\documentclass[hidelinks,11pt]{article}
\usepackage[utf8]{inputenc}
\usepackage[T1]{fontenc}
\usepackage{fullpage}
\usepackage{amsmath}
\usepackage{amsthm}
\usepackage{array}
\usepackage{color}
\usepackage{graphicx}
\usepackage{float}
\usepackage{hyperref}
\usepackage{listings}
\usepackage[margin=.9in]{geometry}
\usepackage{setspace}
\usepackage{natbib}
\usepackage{proof}
\usepackage{multirow}
\usepackage{hhline}
\usepackage{wrapfig}
\usepackage [english]{babel}
\usepackage{grffile}
\usepackage{tikz}
\usepackage{pgfplots}
\usepackage[tikz]{bclogo}
\usetikzlibrary{chains}
\usetikzlibrary{positioning}
\usetikzlibrary{arrows}
\usepackage{lscape}
\usepackage [autostyle, english = american]{csquotes}
\usepackage{enumitem}
\usepackage{caption}
\usepackage{subcaption}
\usepackage{indentfirst}
\usepackage{pdfpages}
\usepackage{booktabs}
\newcommand{\tabitem}{~~\llap{\textbullet}~~}
\bibliographystyle{apsr}
\bibpunct{(}{)}{;}{a}{,}{,}
\DeclareGraphicsExtensions{.pdf,.png,.jpg}
\setlength{\tabcolsep}{.18cm}
\usepackage{fancyvrb}
\usepackage{numprint}
\npthousandsep{,}
\usepackage{etoc}
\usepackage{sectsty}
\sectionfont{\fontsize{13}{13}\selectfont}
\subsectionfont{\fontsize{11}{11}\selectfont}

\usepackage{titlesec}

\makeatletter
\@addtoreset{section}{part}
\@addtoreset{figure}{part}
\@addtoreset{table}{part}
\makeatother
\titleformat{\part}[display]
{\normalfont\LARGE\bfseries}{}{0pt}{}

\newcolumntype{L}[1]{>{\raggedright\let\newline\\\arraybackslash\hspace{0pt}}m{#1}}
\newcolumntype{C}[1]{>{\centering\let\newline\\\arraybackslash\hspace{0pt}}m{#1}}
\newcolumntype{R}[1]{>{\raggedleft\let\newline\\\arraybackslash\hspace{0pt}}m{#1}}

\makeatletter
\renewcommand{\l@section}{\@dottedtocline{1}{1.5em}{2.6em}}
\renewcommand{\l@subsection}{\@dottedtocline{2}{4.0em}{3.6em}}
\renewcommand{\l@subsubsection}{\@dottedtocline{3}{7.4em}{4.5em}}
\makeatother

% ==Cross Referencing Different Docs
\usepackage{xr}
\externaldocument{RefugeesCivilWar_SI_APSRfinal}

\begin{document}

<<eval=TRUE, echo=FALSE, results='hide', message=FALSE>>= 

library(knitr)

opts_chunk$set(cache = TRUE, #remove before we submit, but keep local cache for faster compilation
               cache.path = 'cache_paper/',
               fig.path = 'figures_paper/', 
               tidy = TRUE, 
               echo = FALSE, 
               warning = FALSE, 
               message = FALSE, 
               fig.pos = 't!',
               dev = 'pdf', 
               dpi=200)

options(width = 110, digits = 2)

@

<<eval=TRUE, echo=FALSE, results='hide', message=FALSE>>= 

setwd("Paper_Inputs")

### Load functions and packages
source("RefCivFunctions.R")

# Note: rtb is the main treatment variable -- it refers to refugee informal settlements or formal camps
# rcb is presence of refugee camps 
# rsb is presence of refugee settlements 

##### Load GED 2020 data #####
## Main data
geddata <- read.csv("panel.full_GED_2020.csv", header = T) # GED panel with all provinces
geddatasub <- read.csv("panel.subset_GED_2020.csv", header = T) # subset GED panel to provinces in countries with rtb 
geddatasub2 <- read.csv("panel.dynamic.subset_GED_2020.csv", header = T) #subset main panel to provinces in countries with rtb dynamically 

geddata$nlights_calib_mean <- geddata$nlights_calib_mean*100
geddatasub$nlights_calib_mean <- geddatasub$nlights_calib_mean*100
geddatasub2$nlights_calib_mean <- geddatasub2$nlights_calib_mean*100

geddata$rtb_rtb.other <- geddata$rtb*geddata$rtb.other #interaction variable for ease of presentation
geddatasub$rtb_rtb.other <- geddatasub$rtb*geddatasub$rtb.other
geddatasub2$rtb_rtb.other <- geddatasub2$rtb*geddatasub2$rtb.other

geddata$rcb_rcb.other <- geddata$rcb*geddata$rcb.other 
geddatasub$rcb_rcb.other <- geddatasub$rcb*geddatasub$rcb.other
geddatasub2$rcb_rcb.other <- geddatasub2$rcb*geddatasub2$rcb.other

geddata$rsb_rsb.other <- geddata$rsb*geddata$rsb.other 
geddatasub$rsb_rsb.other <- geddatasub$rsb*geddatasub$rsb.other
geddatasub2$rsb_rsb.other <- geddatasub2$rsb*geddatasub2$rsb.other

geddatasub$new_site_rt_1_rtb.other <- geddatasub$new_site_rt_1*geddatasub$rtb.other  
geddatasub$new_site_rt_2_rtb.other <- geddatasub$new_site_rt_2*geddatasub$rtb.other
geddatasub$no_new_site_rt_1_rtb.other <- geddatasub$no_new_site_rt_1*geddatasub$rtb.other
geddatasub$no_new_site_rt_2_rtb.other <- geddatasub$no_new_site_rt_2*geddatasub$rtb.other

geddata$rtb_nearborder <- geddata$rtb*geddata$nearborder #interaction variable for ease of presentation
geddatasub$rtb_nearborder <- geddatasub$rtb*geddatasub$nearborder

geddata$rtb_best_neighbors_sum_binary <- geddata$rtb*geddata$best_neighbors_sum_binary
geddatasub$rtb_best_neighbors_sum_binary <- geddatasub$rtb*geddatasub$best_neighbors_sum_binary

geddata$rtb_best_foreign_neighbors_sum_binary <- geddata$rtb*geddata$best_foreign_neighbors_sum_binary
geddatasub$rtb_best_foreign_neighbors_sum_binary <- geddatasub$rtb*geddatasub$best_foreign_neighbors_sum_binary

## rtb placebo data
plagedrtb <- read.csv("panel.placebo.rtb.t1_GED_2020.csv", header = T) #placebo data for rtb, all provinces
plagedrtbsub <- read.csv("panel.placebo.rtb.t1.subset_GED_2020.csv", header = T) #subset placebo rtb to provinces in countries with rtb 
plagedrtbsub2 <- read.csv("panel.placebo.dynamic.subset.rtb.t1_GED_2020.csv", header = T) #subset main panel to provinces in countries with rtb dynamically 

plagedrtb$nlights_calib_mean <- plagedrtb$nlights_calib_mean*100
plagedrtbsub$nlights_calib_mean <- plagedrtbsub$nlights_calib_mean*100
plagedrtbsub2$nlights_calib_mean <- plagedrtbsub2$nlights_calib_mean*100

plagedrtb$rtb.placebo_rtb.other <- plagedrtb$rtb.placebo*plagedrtb$rtb.other 
plagedrtbsub$rtb.placebo_rtb.other <- plagedrtbsub$rtb.placebo*plagedrtbsub$rtb.other
plagedrtbsub2$rtb.placebo_rtb.other <- plagedrtbsub2$rtb.placebo*plagedrtbsub2$rtb.other

plagedrtb$rtb.placebo_nearborder <- plagedrtb$rtb.placebo*plagedrtb$nearborder 
plagedrtbsub$rtb.placebo_nearborder <- plagedrtbsub$rtb.placebo*plagedrtbsub$nearborder

plagedrtb$rtb.placebo_best_neighbors_sum_binary <- plagedrtb$rtb.placebo*plagedrtb$best_neighbors_sum_binary
plagedrtbsub$rtb.placebo_best_neighbors_sum_binary <- plagedrtbsub$rtb.placebo*plagedrtbsub$best_neighbors_sum_binary

plagedrtb$rtb.placebo_best_foreign_neighbors_sum_binary <- plagedrtb$rtb.placebo*plagedrtb$best_foreign_neighbors_sum_binary
plagedrtbsub$rtb.placebo_best_foreign_neighbors_sum_binary <- plagedrtbsub$rtb.placebo*plagedrtbsub$best_foreign_neighbors_sum_binary

## rcb placebo data
plagedrcb <- read.csv("panel.placebo.rcb.t1_GED_2020.csv", header = T) #placebo data for rcb, all provinces
plagedrcbsub <- read.csv("panel.placebo.rcb.t1.subset_GED_2020.csv", header = T) #subset placebo rcb to provinces in countries with rcb 
plagedrcbsub2 <- read.csv("panel.placebo.dynamic.subset.rcb.t1_GED_2020.csv", header = T) #subset main panel to provinces in countries with rtb dynamically 

plagedrcb$nlights_calib_mean <- plagedrcb$nlights_calib_mean*100
plagedrcbsub$nlights_calib_mean <- plagedrcbsub$nlights_calib_mean*100
plagedrcbsub2$nlights_calib_mean <- plagedrcbsub2$nlights_calib_mean*100

plagedrcb$rcb.placebo_rcb.other <- plagedrcb$rcb.placebo*plagedrcb$rcb.other 
plagedrcbsub$rcb.placebo_rcb.other <- plagedrcbsub$rcb.placebo*plagedrcbsub$rcb.other
plagedrcbsub2$rcb.placebo_rcb.other <- plagedrcbsub2$rcb.placebo*plagedrcbsub2$rcb.other

## rsb placebo data
plagedrsb <- read.csv("panel.placebo.rsb.t1_GED_2020.csv", header = T) #placebo data for rsb, all provinces
plagedrsbsub <- read.csv("panel.placebo.rsb.t1.subset_GED_2020.csv", header = T) #subset placebo rsb to provinces in countries with rsb 
plagedrsbsub2 <- read.csv("panel.placebo.dynamic.subset.rsb.t1_GED_2020.csv", header = T) #subset main panel to provinces in countries with rtb dynamically 

plagedrsb$nlights_calib_mean <- plagedrsb$nlights_calib_mean*100
plagedrsbsub$nlights_calib_mean <- plagedrsbsub$nlights_calib_mean*100
plagedrsbsub2$nlights_calib_mean <- plagedrsbsub2$nlights_calib_mean*100

plagedrsb$rsb.placebo_rsb.other <- plagedrsb$rsb.placebo*plagedrsb$rsb.other
plagedrsbsub$rsb.placebo_rsb.other <- plagedrsbsub$rsb.placebo*plagedrsbsub$rsb.other
plagedrsbsub2$rsb.placebo_rsb.other <- plagedrsbsub2$rsb.placebo*plagedrsbsub2$rsb.other

##### Load Africa population data #####
gedpopafrica <- read.csv("panel.subset_Africa_population.csv", header = TRUE) # subset with refugee population in Africa
plagedpopafrica <- read.csv("panel.subset_Africa_population.placebo.rtb.t1_GED_2020.csv", header = TRUE) # placebo version

gedpopafrica$RefPop_Ratio <- gedpopafrica$Total/gedpopafrica$pop #ratio of refugee population to local population

gedpopafrica$large_ref_population_rtb.other <- gedpopafrica$large_ref_population*gedpopafrica$rtb.other  
gedpopafrica$small_ref_population_rtb.other <- gedpopafrica$small_ref_population*gedpopafrica$rtb.other

plagedpopafrica$rtb.placebo_rtb.other <- plagedpopafrica$rtb.placebo*plagedpopafrica$rtb.other  

##### Load UNHCR location data #####
# subset UNHCR data to refugee sites, camps, and settlements open within study period
allsites <- read.csv("camps_settlements_processed.csv", header = TRUE) # in our study period
refsites <- allsites[allsites$loc_type == "Refugee Camp" |
                     allsites$loc_type == "Refugee Settlement",]

refcamps <- allsites[allsites$loc_type == "Refugee Camp",]

refsettlements <- allsites[allsites$loc_type == "Refugee Settlement",]

@

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \title{\LARGE{Reexamining the Effect of Refugees on Civil Conflict:\\A Global Subnational Analysis}\thanks{We are indebted to experts from the United Nations High Commissioner for Refugees (UNHCR): Jason Hepps, Ewen Macleod, Raouf Mazou, Brett Moore, Petra Nahmias, Htun Zaw Oo, Yvon Orand, Kimberly Roberson, Karl Steinacker, Tayyar Sukru Cansizoglu, Vicky Tennant, Alex Tyler, and staff members of the Field Information and Coordination Section (FICSS) for providing data and guidance. We are also grateful to Idean Salehyan, Kristian Skrede Gleditsch for sharing their data and code; to Tsering Wangyal Shawa for GIS expertise; and to Joshua Angelo, Lesley Chavez, Zachary Durkee, Nia Gooding, Amy Hu, Caterina Hyneman, Jack Jacobs, Karina Martinez, Michael Nachman, Nicholas Nobles, Amanda Ostrom, Meghna Ray, Alexander Rounaghi, Jared Solomon, Zach Stevens, and Nicholas Woo for excellent research assistance. We thank Lamis Abdelaaty, Alexander Betts, Mietek Boduszynski, David Carter, Miguel Centeno, Arjun Chowdhury, Rafaela Dancygier, Benjamin Fifield, Oscar Gil-Garcia, Guy Grossman, Kosuke Imai, Robert Keohane, Kabir Khanna, Matthew Kocher, Evan Lieberman, Daniel Masterson, Jean-François Maystadt, Julia Morse, Jacob Shapiro, the Imai Research Group, and participants from ISA 2014, APSA 2015, 2017 Harvard-MIT-Yale Political Violence Conference, the Empirical Studies of Conflict 2019 Annual Meeting, and Pomona College's International Relations Program for helpful comments. Shaver acknowledges funding support from Princeton University's Institute for International and Regional Studies. Zhou acknowledges funding support from the National Science Foundation (SES--1148900). 
Replication materials are available via Harvard University's Dataverse.}
}

\author{Yang-Yang Zhou\thanks{Assistant Professor, Department of Political Science, University of British Columbia, \href{mailto:yangyang.zhou@ubc.ca}{yangyang.zhou@ubc.ca}, \href{https://www.yangyangzhou.com/}{www.yangyangzhou.com}}
\hspace{.02cm} and Andrew Shaver\thanks{Assistant Professor, Department of Political Science, University of California, Merced. Affiliate, CISAC, Stanford University. Email: \href{mailto:ashaver@ucmerced.edu}{ashaver@ucmerced.edu}, \href{www.andrewcshaver.com}{www.andrewcshaver.com}}
\thanks{Authors contributed equally. Author order was randomized using \url{https://randomizeauthor.shinyapps.io/shiny}}
}

\date{\today}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\renewcommand{\harvardurl}{URL: \url}

\maketitle

\begin{abstract} % 150 words
\singlespacing
  \noindent A large literature suggests that the presence of refugees is associated with greater risk of conflict. We argue that the positive effects of hosting refugees on local conditions have been overlooked. Using global data from 1990 to 2018 on locations of refugee communities and civil conflict at the subnational level, we find no evidence that hosting refugees increases the likelihood of new conflict, prolongs existing conflict, or raises the number of violent events or casualties. Furthermore, we explore conditions where provinces are likely to experience substantively large \textit{decreases} in conflict risk due to increased development. Analysis examining nighttime lights as a measure of development, coupled with expert interviews, support our claim. To address the possibility of selection bias, we use placebo tests and matching. Our research challenges assertions that refugees are security risks. Instead, we show that in many cases, hosting refugees can encourage local development and even conflict reduction.\\
\\
  
\noindent \textbf{Keywords:} refugees, migration, conflict, violence, subnational, GIS\\
%\noindent \textbf{Word count:} 
\end{abstract}

\pagenumbering{gobble}

\newpage
\setstretch{1.87}

\pagenumbering{arabic}
\setcounter{page}{1}

\addtocontents{toc}{\protect\setcounter{tocdepth}{0}}

% RESEARCH QUESTION
Does the presence of refugees affect the likelihood of domestic conflict in hosting areas? Understanding the potential security threat of refugees is a major concern to conflict and migration scholars. Given the growing scale of the contemporary refugee crisis and increasing public backlash toward refugee communities around the world, this research question is also of political and policy relevance \citep{Whitaker:2017,UNHCR:2020}. The existing research largely contends that hosting refugees is associated with increased risk of conflict. As \cite{fisk2020forced} notes in a review piece on this subject: ``much of the discussion in the security studies context continues to center on forced migration flows as a conduit for civil war, international terrorism, and refugees as perpetrators.'' These studies point to possible tensions with local citizens that are exacerbated through resource competition or ethnic rivalry, or how refugees may themselves be combatants and perpetrators of violence \citep[e.g.][]{Zolberg:1989,Lischer:2006,Salehyan:2006,Ruegger:2017}. Others argue that in these conflicts, refugees are typically victims \citep[e.g.][]{Onoma:2013,Fisk:2018,Bohmelt:2019,Savun:2019}. Regardless, perceptions of refugees as destabilizing forces are pervasive in the literature and political rhetoric. In this paper, we theorize that the positive effects of hosting refugee communities on local economic activity and development can offset and even outweigh the potentially destabilizing effects. We argue and empirically substantiate that hosting refugees generally has \textit{no effect} on civil conflict at the local level. Furthermore, we find that under certain conditions where investments in local development are more likely, refugee settlement has \textit{negative effects} on conflict likelihood. Thus, while there are certainly cases of refugee-related conflict, we contend that these are exceptional, not the norm.

% MOTIVATING CASES FROM GREAT LAKES REGION AND SYRIA
As a prominent example of regional civil conflict diffusion coinciding with large refugee populations, scholars and policymakers alike have pointed to Central Africa and the Great Lakes Region from the early 1990s. This period saw instances of refugees actively involved in new conflicts as ``refugee-warrior communities,'' such as the 1994 Rwandan refugees who went into the eastern regions of the Democratic Republic of Congo (DRC), which led to the outbreak of civil conflict there in 1996 and 1998 \citep{Zolberg:1989,Whitaker:2003,Braithwaite:2010}. On the other hand, refugees from Burundi, the DRC, and Rwanda fled to western Tanzania at the same time, drastically increasing the overall population of this region by over 50\%. Yet Tanzania did not experience conflict onset, and in some cases local host communities even benefited through increased development and economic activity \citep{Whitaker:2002,Whitaker:2003,Alix:2009,Maystadt:2018}. Similarly, refugee-hosting regions in Uganda and northwest Kenya have experienced not only peaceful coexistence with local communities, but also improvements in transport infrastructure, access to health care, and state security presence \citep{Jacobsen:2002livelihoods,Sanghi:2016}. 

Consider the Syrian refugee crisis, which remains the largest displacement crisis. As of 2020, Turkey and Jordan are hosting the vast majority of Syrian refugees at over 4.2 million. Even after receiving massive numbers of refugees, subsequent levels of conflict in these countries have been surprisingly low. Refugees are not recruited into fighting, and humanitarian aid to refugees, rather than fueling violent mobilization as some have theorized, has instead bolstered local economies and decreased tensions from local hosts toward refugees \citep{akgunduz2018impact, altindag2019blessing,Lehmann:2020,Masterson:2019}. These examples question whether cases of refugee presence contributing to conflict are the norm or exceptional. They also highlight certain factors associated with hosting refugees, particularly opportunities for development, that could contribute to reducing the risk of conflict.

% SUMMARIZE ARGUMENTS
We argue that hosting refugees does not increase the risk of domestic conflict, measured in a variety of ways. Drawing on previous studies that find positive or mixed economic and development effects of refugee communities on host areas, we theorize that gains in local development can offset and even, outweigh factors that would negatively affect security. These gains come from increased economic activity, state presence, and humanitarian aid efforts, which can have a notable impact on the often peripheral, poor areas that host refugees. In cases where these resources and efforts can be most efficiently allocated, for example when refugee communities are geographically concentrated in relatively large numbers within a country, these areas may even experience decreased risk of conflict. 

% HIGHLIGHT DATA & MIXED METHODS
Empirically, this paper provides quantitative evidence at a global, subnational scale of the effect of refugee communities on conflict likelihood where they settle. We provide new data, gathered in collaboration with the United Nations High Commissioner for Refugees (UNHCR) Field Information and Coordination Section (FICSS), on all geo-coded sites of refugee communities around the world (formal camps and informal settlements), which we pair with geo-coded conflict outcomes and other development-related data at the subnational level spanning 1990 to 2018. We complement our quantitative analysis and inform our theory building through expert interviews conducted with a dozen current or former UNHCR senior staff to help further contextualize refugee-hosting dynamics.

% SUMMARIZE FINDINGS 
We find no evidence that the presence of refugees leads to new conflict, prolongs ongoing conflict, or increases the intensity of conflict. This null effect is persistent across a variety of specifications and robustness checks. In a secondary analysis, we find that when refugee communities are geographically concentrated -- meaning they are only present within one province in a given country-year -- in comparison to similar provinces in non-hosting countries, these hosting provinces see their likelihood of conflict onset halved and other outcomes substantially decreased. We call this the \emph{conditional risk reduction effect}. To further unpack these effects, we conduct additional heterogeneous and subgroup effects analyses by comparing provinces that host new versus older refugee sites, formal refugee camps versus settlements, large versus small refugee populations, those located near the border versus in the country's interior, and by region. We find that if refugee sites are geographically concentrated within a country, have had more time to be established, and contain relatively large numbers of refugees, then their hosting provinces experience substantively large \emph{decreases} in conflict risk and intensity. Under these conditions, there is generally a greater presence of UNHCR and other aid organizations; thus, local development spurred by refugee settlement should be more likely. Indeed, using nighttime lights data to proxy for development, we find supportive evidence of increased development under these conditions. To preclude the possibility that refugees select into more developed and secure areas, which may bias our results, we use placebo tests to show that there are no effects of future refugee-hosting on present outcomes as well as matching methods for causal inference with panel data.

% CONTRIBUTIONS/POLICY IMPLICATIONS 
Our research makes several important contributions to scholarship, policy, and current political debates about refugees. We respond to the security studies literature, which links refugees to increased conflict, directly challenging conventional views on the subject. Additionally, by engaging with a growing literature in political science, migration studies, and development economics on the potential positive effects of refugee-hosting on local development, we not only bridge academic literatures, but we contribute to the latter through our global analysis of the localized effects of refugee presence on development. For policymakers, our results showing that refugee-hosting generally has no effect on conflict risk, and in some cases, a substantial negative effect, strongly refutes political arguments for putting up physical and legal barriers to restrict the acceptance of refugees due to security concerns. Instead, our findings suggest that policymakers and humanitarian actors involved in refugee settlement should focus on opportunities to increase state capacity and develop infrastructure in hosting areas.


\section*{Theorizing Refugees as Security Risks or Development Opportunities}
\label{sec:theory}

% DEFINITIONS 
First, we define recurring concepts for clarity. Our study focuses on the effects of the presence of \textit{refugee communities}, which are visible groups of refugees, as opposed to individuals or households who settle dispersed within host communities or urban contexts.\footnote{A \textit{refugee} is someone who ``owing to a well-founded fear of being persecuted for reasons of race, religion, nationality, membership of a particular social group or political opinion, is outside the country of her or his nationality and is unable, or due to such fear, unwilling to avail him- or herself of the protection of that country.'' (Article 1, The 1951 Convention Relating to the Status of Refugees).}\textsuperscript{,}\footnote{We do not have a size threshold in our primary analyis, as will describe further later in this paper; however, we carry out additional analyses using direct measures of population size in robustness checks.}
The term \textit{refugee sites} in this article refers to the precise geographic locations where refugee communities settle; it encompasses both \textit{formal camps} as well as \textit{informal settlements} (even those in which the UNHCR is not actively working or has no mandate); unless there is a visible community within a city, it does not include major cities where scattered individual refugees may have settled. According to our discussions with UNHCR officials, whether a refugee site is recognized as a ``camp'' or ``settlement'' is usually designated by the host state rather than the UNHCR.\footnote{UNHCR operational protocols refer to camps and settlements together without explicitly differentiating the two: \url{www.unhcr.org/uk/448d6c122.pdf}.} Camps are generally associated with more state control and international recognition while settlements are typically informal areas of refugee communities. Between the two, there is no clear delineation based on demographic information such as population sizes or levels of aid.\footnote{Conducted with the Field Information and Coordination Support Section (FICSS) of UNHCR on December 30, 2013.}
By \textit{conflict}, we examine several outcomes related armed civil conflicts
-- onset (new conflict), incidence (continued conflict), number of violent events, and number of battle deaths.\footnote{These are defined further in our description of the variables. The body of research connecting refugees to civil conflict is rather large. In some cases, refugees are associated with the outbreak of conflict. In others, they are associated with the continuation of conflict or changes in its intensity. Rather than limit our focus to a particular outcome (e.g. onset), we consider the relationship between refugee communities and various conflict outcomes. When referring to conflict in this research, except where explicitly stated, we are referring to civil conflict generally.} 
Finally, our geographic units of interest are \textit{provinces}, the first-order administrative units within each country (also referred to as states, governorates, regions, etc. We use ``province'' for simplicity).

% BRIEF OVERVIEW
Next, we briefly review research and proposed mechanisms that link refugees to increased risk of conflict. We consider how refugee communities may affect conflict either directly or indirectly. Recognizing that instances in which refugees contributed to conflict exist and are therefore notable, we contend that these cases are rare. Instead, we theorize that the modal outcome is when refugee communities and their hosts simply co-exist in a manner that leaves the status quo on conflict unchanged. Under such circumstances, tensions may emerge, for instance, over resources. Yet, rather than devolving into civil conflict, as our expert interviews highlight, other means of dispute resolution often prevail. Additionally, refugees can revitalize local economies as both producers and consumers, as well as attract development aid from humanitarian and state actors. These gains can lead to reductions in conflict risk. Drawing from a broad body of evidence, we claim that the potential stabilizing effects of refugee communities are both under appreciated and potentially more important than the destabilizing ones. In some cases, they serve to offset potentially destabilizing ones. In others, they may serve to decrease conflict risk overall. Throughout this section, we incorporate expert interviews, which we conducted with a dozen senior officials who have held or currently hold a leadership position at the UNHCR to give additional context.\footnote{These officials have extensive experience working with refugee communities across 15 different countries, mostly in sub-Saharan Africa and the Middle East. More than half of these officials worked on refugee issues in multiple nations. Specifically, we asked them to describe how UNHCR and other actors typically operate in refugee-hosting areas, the formal or informal policies pertaining to refugee-host relations, and what they have observed in the field in terms of effects on local development and conflict. Please find additional discussion, including adherence with the Principles and Guidance for Human Subjects Research, and interview quotes in Section \ref{SIsec:interviews} in the Supplementary Information (SI). For privacy, we refer to them not by their names but by their position titles at the time of the interview.}

% Research on general relationship between refugees and increasing conflict likelihood
\subsection*{Literature linking refugees to conflict}

Scholars of international security have identified how countries whose neighbors experience civil conflict are themselves significantly more likely to experience civil conflict. Such regional conflict diffusion suggests that incidents of sub-state conflict are not independent events with purely domestic causes \citep{Gleditsch:2002, Braithwaite:2006, Hegre:2006, Gleditsch:2007}. \citet{Forsberg:2014, Forsberg:2016} lays out transnational dimensions of civil conflict, one of which is ``contagion,'' characterized by direct spillovers of arms, polarized ethnic groups with kinship ties, and refugee populations. Many related studies find that the likelihood of civil conflict or other forms of violence, such as terrorism or non-state actor violence, increases in countries that host more migrants, and specifically refugees, from nearby countries at war \citep{Salehyan:2006,Salehyan:2007,Salehyan:2009,Choi:2013,Milton:2013,Bove:2016,Bohmelt:2019}. Notably, \cite{Salehyan:2006} show that larger populations of refugees from neighboring counties are positively associated with greater likelihood of civil war onset in the host country.

<<maprefciv, eval=TRUE, echo = FALSE, tidy=TRUE, fig.width = 10, fig.height = 8, out.width= "1\\linewidth", fig.align='left', warning=FALSE, message=FALSE, strip.white=TRUE, fig.cap= paste("The top global map shows the yearly average number of violent events (red shading) during our panel 1990 - 2018 along with the", nrow(refsites), "refugee sites (blue points) in our data that were open at any point during this time. The map below zooms in on the area outlined by the black box in the global map. This area covers West, Central, and the Horn of Africa, where many refugee and conflict sites are concentrated. Data sources: UNHCR data on displacement locations and the UCDP Georeferenced Event Dataset (GED).")>>=

setwd("Paper_Inputs")

refsites.shp <- SpatialPointsDataFrame(cbind(refsites$POINT_X,
                                           refsites$POINT_Y),
                                     data = refsites,
                                     proj4string = CRS("+proj=longlat +ellps=WGS84 +datum=WGS84"))

refcamps.shp <- SpatialPointsDataFrame(cbind(refcamps$POINT_X,
                                           refcamps$POINT_Y),
                                     data = refcamps,
                                     proj4string = CRS("+proj=longlat +ellps=WGS84 +datum=WGS84"))

refsettlements.shp <- SpatialPointsDataFrame(cbind(refsettlements$POINT_X,
                                           refsettlements$POINT_Y),
                                     data = refsettlements,
                                     proj4string = CRS("+proj=longlat +ellps=WGS84 +datum=WGS84"))

# bring in global shapefile
provinces.shp <- rgdal::readOGR("world_admin_1998.shp", verbose = FALSE)
provinces.shp <- provinces.shp[provinces.shp@data$REGION != "Antarctica",]
provinces.df <- slot(provinces.shp, "data")

# conflict range from high propensity (dark red) to low (white)
ramp <- colorRamp(c("white", "darkred"))

# mean of attacks over the years by province, full sample
attackmean <- log(tapply(geddata$attack, geddata$GMI_ADMIN, mean, na.rm = T) +1)

# colors for propensity 
cols_attackmean <- rgb(ramp(attackmean/max(attackmean))/255)

# merge into data
cols_attackmean_df <- data.frame(GMI_ADMIN = names(attackmean), attackmean = attackmean, cols_attackmean = cols_attackmean)
provinces.shp@data$index <- 1:nrow(provinces.shp@data)
provinces.shp@data <- merge(provinces.shp@data, cols_attackmean_df, by = "GMI_ADMIN", all.x = TRUE)
provinces.shp@data <- provinces.shp@data[order(provinces.shp@data$index),]

## GGPlot
provinces.shp@data$id <- rownames(provinces.shp@data)
provinces.shp_points <- fortify(provinces.shp, region="id")
provinces.shp_df <- plyr::join(provinces.shp_points, provinces.shp@data, by="id")

refsites.shp_df <- coordinates(refsites.shp)
refsites.shp_df <- as.data.frame(refsites.shp_df)
refsites.shp_df$id <- 1
names(refsites.shp_df) <- c("long", "lat", "id")

GlobalMap <- ggplot() +
    geom_polygon(data = provinces.shp_df,
                 aes(long, lat, group = group, fill = attackmean),
                 color = "black", size = .2) +
    geom_point(data = refsites.shp_df, aes(long, lat, colour = as.factor(id)),
               pch = 16) +
    coord_cartesian(ylim = c(-40, 50), xlim = c(-100, 145)) + 
    ylab("Latitude") +
    xlab("Longitude") +
    scale_fill_gradient(low = "#FFFFFF", high = "#8B0000",
                        #breaks = c(0, .15, 1),
                        #labels = c("0", "3", "6"),
                        name = "Number of Violent Conflict Events") +
    ggplot2::annotate("rect", xmin = -20, xmax = 55, ymin = -10, ymax = 15, colour = "black",
             alpha = 0) + 
    scale_colour_manual(values = "blue", labels = "Refugee sites", name = "") +
    theme(panel.background = element_blank(),
        panel.border = element_rect(colour = "gray", fill=NA, size=.8)
        ) +
    guides(fill = guide_legend(title.position = "top")) + 
    theme(legend.position = "none")

CentralAfrica <- ggplot() +
    geom_polygon(data = provinces.shp_df,
                 aes(long, lat, group = group, fill = attackmean),
                 color = "black", size = .2) +
    geom_point(data = refsites.shp_df, aes(long, lat, colour = as.factor(id)),
               pch = 16, cex = 2) +
    coord_cartesian(ylim = c(-10, 15), xlim = c(-20, 55)) + 
    ylab("Latitude") +
    xlab("Longitude") +
    scale_fill_gradient(low = "#FFFFFF", high = "#8B0000",
                        breaks = c(log(0 + 1), log(100 + 1), log(1000 + 1)),
                        labels = c("0", "100", "1000"),
                        name = "Average Yearly Violent Conflict Events") +
    scale_colour_manual(values = "blue", 
                        labels = "", 
                        name = "Refugee sites") + 
    theme(panel.background = element_blank(),
        panel.border = element_rect(colour = "gray", fill=NA, size=.8),
        legend.key = element_rect(fill = NA, color = NA)
        ) +
    guides(fill = guide_legend(title.position = "top")) + 
    theme(legend.position = "bottom")

grid.arrange(GlobalMap, CentralAfrica, nrow = 2)

@  

At first glance, our subnational province-year panel data, which we describe in detail in the following section, exhibit a similar association. Figure \ref{fig:maprefciv} overlays refugees sites (blue points) and the yearly average number of violent events  (red shading) by province across the time span of our study 1990 - 2018. Refugee sites are clearly geographically concentrated in regions in which conflict is endemic. This is unsurprising insofar as refugees are most frequently displaced due to conflict and tend to settle in neighboring countries that may also be affected by regional instability. Thus, causal inferences about the effects of refugees on conflict must address issues of potential simultaneity.\footnote{Relatedly, we note that there is a substantial body of work that considers the effects of conflict on displacement. See, for instance, \cite{Schmeidl:1997, Balcells:2016, Steele:2017}.}

% Case studies that summarize mechanisms why negative effects
% refugees as perpetrators
Although much of this literature focuses on the outbreak of conflict, it builds upon a rich body of case studies, which associate refugees with patterns of conflict, such as increased likelihood of new conflict onset or greater intensity of existing conflict. This qualitative body of work also helps identify possible mechanisms driving relationships between refugees and conflict.\footnote{Several cases feature prominently: militarized refugees such as the Banyarwanda refugees in Eastern Congo in the 1960s; refugee movements from Liberia's two civil wars from 1989 through 2003 contributing to the insecurity of neighboring countries of Sierra Leone, Guinea, and the Ivory Coast; the 1994 Rwandan refugees in the DRC as mentioned in the introduction; and members of Al-Shabaab operating within the Somali refugee camps in Kenya \citep{Matthews:1972, Zolberg:1989, Loescher:1992,Lischer:2006, Whitaker:2002, Muggah:2006}.} 
In some instances, refugee sites have served as potential recruitment areas and bases of operation for active rebel groups. For instance, \citet{haer2019recruiting} find that recruiters for combatant groups often target refugees who feel economically deprived. Refugees who are not themselves combatants can still be members of auxiliary forces, assisting with the transfer of munitions and other mobilization resources to domestic opposition groups with whom they share an ethnic or ideological affinity \citep{Zolberg:1989, Lischer:2006}. 

% refugees as victims
Nevertheless, others have argued that refugee populations consist overwhelmingly of civilian noncombatants who, because they are fleeing conflict, actively try to avoid conflict-related activities \citep{Matthews:1972,Whitaker:2002,Onoma:2013,Fabbe:2019}. Yet even when refugees are not perpetrators of violence, their presence alone may incite conflict if the local population or state-sanctioned forces attack refugees. Refugee populations may also alter demographic and social balances within host countries. In societies with a precarious social cleavages balance or pre-existing rivalries and tensions, these changes in demographic makeup can provoke conflict \citep{Loescher:1992, Ruegger:2017}. Refugees may impose (or appear to impose) an economic burden by straining local social services and infrastructure \citep{Weiner:1992}. Host citizens may view them as competition for land, jobs, housing, marriage partners, etc., leading to violence against refugees \citep{Jacobsen:2005,Dancygier:2020}. In some cases, the provision of humanitarian assistance to refugees by international organizations can provoke resentment and increase the likelihood of conflict \citep{Jacobsen:2005}. Comparing cases from Guinea, Uganda, and the DRC, \citet{Onoma:2013} finds that even in rare cases where refugees are associated with new conflicts, refugees are the ones attacked by hosting communities at the instigation of host governments for politically motivated reasons. Similarly, \citet{Savun:2019} argue that host governments use refugees as scapegoats in the wake of terrorist attacks or other security crises. Using global panel data at the country-level, they show that during these times, refugees are likely to be attacked. \citet{Bohmelt:2019} find an association between refugees and non-state actor violence, again pointing to tensions with local communities. 

Across various contexts, there is growing public backlash against refugees by host citizens, as detailed in the recent literature on anti-refugee attitudes by host citizens \citep[e.g.][]{Dancygier:2014,Whitaker:2015,Ferwerda:2017,Adida:2018,alrababa2021attitudes,Zhou:2019}. However, as we describe below, humanitarian organizations are often cognizant of these dynamics and intentionally adjust their policies to generate positive externalities for local host communities, thereby minimizing or preempting tensions between arriving refugee communities and local citizens. Finally, the presence of refugees can pose challenges to state capacity and political legitimacy, especially given that areas where refugees tend to settle are border, peripheral areas \citep{Whitaker:2002,Onoma:2013}. 

% Expert interviews on how these cases were exceptional
While many of the officials we interviewed recalled individual cases of refugee-related conflict, such as in 1980s Pakistan and in 1990s DRC, they consistently pointed out that these cases are ``the exception, [which] becomes the focus of all the studies, rather than the norm.''\footnote{Senior Official 1 at FICSS, interview conducted on June 25, 2018.} 
They recognized that tensions can still flare between refugees and host communities: ``Certainly there are instances where the presence of refugees creates [non-violent] conflict with the local community like competition for resources, access to fire wood collection, environmental impact.''\footnote{Senior Policy Officer, interview conducted on July 23, 2018.} However, by anticipating these tensions, ``UNHCR and aid agencies try to put in place prevention mechanisms... creating informal structures, committees, where the local people like elders and chiefs would meet regularly... Every community has structures. Even the refugee community is not just some amorphous group of people but they have spokespeople, traditional leaders... We will find these people pretty fast and easily because they are our interlopers. On the side of the host communities, we also have existing institutional structures.''\footnote{Senior Official at Division of Programme Support and Management, interview conducted on September 12, 2018.}
Several officials expressed frustration that rising fears of refugees as security threats are overblown: ``Despite all the hysteria of refugees and conflict, refugees account for a small part of the population. This is an eminently manageable problem... [Large refugee populations in places like] Uganda and in Bangladesh, they have been managed without disastrous consequences by governments with much fewer resources.''\footnote{Senior Official at Policy Development and Evaluation, interview conducted on July 6, 2018.}\textsuperscript{,}\footnote{For context, Uganda and Bangladesh are each hosting about 1.2 million refugees as of early 2019.} 

% Turning to positive effects on local communities / our theory
\subsection*{Accounting for the positive effects of hosting refugees}

Prompted by these cases of well-managed refugee hosting situations, we draw on recent research that examines the local economic and infrastructural effects of refugees to highlight several ways in which the presence of refugees can positively contribute to host areas. We think of such ``positive externalities'' of refugee presence as acting on conflict likelihood in two general ways. First, some externalities do not reduce the likelihood of conflict that existed prior to refugees' arrival but serve to mitigate potential increases in that likelihood following their arrival. These effectively help preserve the status quo. An example of such an externality is the provision of health services, as we describe in more detail below, to both refugees and local nationals (particularly in cases where pre-existing health services were limited). In such cases, potential tensions among refugees and locals may be reduced (or, in the case of tensions over existing resources, prevented) by the provision of such services. 

The second set of positive externalities potentially serve to reduce the likelihood of conflict (or reduce the intensity of ongoing conflict). Perhaps the clearest example of this is the development of critical infrastructure (e.g. electrification, roads) in areas of countries where state capacity had previously been significantly limited. In their study of the determinants of civil war, \cite{Fearon:2003} find that state capacity bears significantly on a country's likelihood of experiencing conflict, and we accordingly theorize that those externalities that promote state capacity are likely to increase the state's ability to prevent conflict from breaking out or to suppress ongoing violence. We theorize that taken together, these positive externalities of refugee presence largely counterbalance their negative effects, resulting in a general status quo outcome for risk of conflict. Below, we describe these dynamics in greater detail.

First, refugee sites often ``become repositories of such resources as relief supplies and food aid, vehicles, communication equipment, employment and transport contracts with relief agencies, and other locally valued and scarce materials'' \citep[p.577]{Jacobsen:2002state}. Humanitarian assistance and infrastructural development, although primarily intended for refugees, can lead to positive externalities for local host communities. Empirically, these have included greater electrification; expanded access to health care facilities; new water wells and improvements to sanitation infrastructure; and expansions in roads and bridges which decreases transport costs for local communities \citep{Van:1998,Jacobsen:2002state,Bariagaber:2006,Tatah:2016,Maystadt:2018}. To preempt tensions, some agencies make assistance directly available to local citizens given that refugee sites are often located in already marginalized locations where local host communities are themselves impoverished \citep{Sanghi:2016}. In fact, this approach is explicitly outlined in UNHCR's Handbook for Emergencies (1999), and was described to us in interviews. As one official noted, ``Increasingly over the years, there was a real push to set things up in such a way that the local community was also benefiting from them like health clinics for example, the nearby villages [also benefits] from that facility. More recently, the shift has been to how you boost services more generally to areas where refugees are located, enhancing existing services.''\footnote{Senior Policy Officer, interview conducted on July 23, 2018.}

Second, refugees themselves often contribute physical, social, and human capital to local host economies \citep{Alix:2009,Taylor:2016,Alix:2018,Maystadt:2018}. For example, the region of Turkana, northern Kenya which hosts the Kakuma refugee camp, saw such substantial increased economic activity that \citet{Alix:2018} were able to detect increases in nighttime lights from satellite data. \citet{Sanghi:2016} describe how these positive effects on the economy, attributable to refugee-owned businesses, motivated local host citizens to not just tolerate the refugees, but welcome their presence, as the following case exemplifies: ``According to UNHCR, when there was talk about closing Kakuma in the early 2000s, there was an uproar among the host community, who saw the camp as their main source of employment, business opportunities, and commercial goods'' (p.3). Refugees can also bring foreign remittances and cash transfers, which inject foreign capital into local markets \citep{Jacobsen:2002state}. Similarly, \cite{akgunduz2018impact} and \cite{altindag2019blessing} document meaningful positive changes in both the productivity and number of businesses operating in Turkey in those areas that received large numbers of Syrian refugees. And contrary to some of the literature cited above linking humanitarian aid to refugees with joining armed groups \citep[e.g.][]{Lischer:2006,Salehyan:2009}, \citet{Lehmann:2020}, who examine the effects of cash transfers to Syrian refugees in Lebanon, find that aid reduced violence involving refugees by concurrently benefiting host citizens through refugee expenditure on local goods and services supplied by local businesses.\footnote{Nevertheless, we do not mean to suggest that the effects of refugee presence on local economic activity are uniformly positive. As with many factors that influence economic activity, there are winners and losers. For instance, \cite{braun2014employment} and \cite{calderon2016labour} highlight negative effects of refugee presence on employment opportunities and wage levels of native citizens. Other scholars including \cite{alix2012displaced} find heterogeneous effects of refugee presence on local economies, effecting positive changes (for instance, for property owners and certain suppliers), and negative changes for others (consumers). For more detailed reviews of such effects, see \cite{maystadt2019impacts} and \cite{ruiz2013economics}.}

Third, the presence of refugees can present opportunities for the state to develop capacity in more marginalized areas \citep{Herbst:2000}. Since refugee movements are often highly visible phenomena with international attention, ``they therefore represent political leverage for savvy actors, including the state itself'' to channel resources for state building \citep[p.578]{Jacobsen:2002state}. International donor agencies could contribute training and equipment to state security forces and local police. For example, when western Tanzania hosted large refugee populations in the mid-1990s, the state increased its bureaucratic presence to this border region \citep{Landau:2008}. In some cases, the UNHCR will also directly assist state governments with the costs associated with enhancing security in areas that receive refugees.\footnote{Senior Official 1 at FICSS, interview conducted on June 25, 2018.}

The officials we interviewed described numerous instances in which the presence of refugee communities improved local state capacity, access to electricity, roads, health care, and education. ``Very often these populations have accumulated in border areas which are often the least developed parts of the country. So while the arrival of refugees evokes a security reflex, those in charge of refugees in the government... tend to attract financing even through humanitarian channels into areas that generally did not have that interest in that part of the country. You have external financing that leverages government fiscal expenditures.''\footnote{Senior Official at Policy Development and Evaluation, interview conducted on July 6, 2018.} Others cited cases in Sri Lanka, Colombia, and Eastern Chad where they observed increased roads networks as a response to the settlement of refugees. They also discussed how refugees themselves help revitalize local economies. Referencing again our motivating example of Kakuma camp in Kenya, ``There is a vibrant economy here, not only with humanitarian agencies. The refugees contribute to 3\% economic growth every year in Kakuma... They have driving schools, hardware stores, cinemas, restaurants, hotels... [the refugees] are an economic engine.''\footnote{Senior Official at Kakuma Refugee Camp, interview conducted on July 23, 2018.}

% Mixed effects, resummarize theory
Taken together, we recognize that there is often a mix of negative and positive effects of refugee presence on host communities, which themselves are not homogeneous. For example, scholars find that host citizens in urban areas, informal laborers, consumers, and the poorest are at a greater disadvantage, due to higher market prices, higher rents, and wage competition \citep{Chambers:1986,Cortes:2004,Alix:2009,Maystadt:2014,Tumen:2016}. Refugee-hosting policies, such as whether they have the right to move and work, could also attenuate positive economic effects. Ultimately, we argue that on average, these positive and negative effects balance out, and refugees and host communities simply co-exist. Even when relations are strained and refugees are viewed as a threat, \citet{DRC:2018} and \citet{Getmansky:2018} find that host citizens are still wary of violence and do not change their attitudes for peace. 

% hypotheses
\subsection*{Hypotheses}
\label{sec:hypotheses}

We first test the following main hypothesis that is prevalent in the literature:

\begin{quote}
\setstretch{1}
\textbf{Hypothesis 1 (H1)}: Areas hosting refugee communities \textit{experience more conflict} compared to areas without refugees.
\end{quote}

We do not find empirical support for this hypothesis. Next, we further unpack this null effect by examining when beneficial effects of hosting can not only offset but \textit{outweigh} the potentially destabilizing effects. We argue that when state actors and humanitarian aid agencies efficiently allocate resources and services, such as when refugee communities are geographically concentrated to one region, these cases have the greatest chance to experience the positive effects on local development, and consequently, conflict risk reduction. One senior official reflected, ``On the distribution of humanitarian resources, I suppose the main observation that in terms of when \textit{people are concentrated}, the tendency is to invest in services and facilities that directly serve that population. The way that things were traditionally done, ...the aid would be much more focused on the refugee population.''\footnote{Senior Official at Policy Development and Evaluation, interview conducted on July 6, 2018.}
In line with this expectation, we predict that,

\begin{quote}
\setstretch{1}
\textbf{Hypothesis 2 (H2)}: Areas hosting refugee sites, conditional on there being no other refugee hosting provinces in the same country-year (i.e. geographically concentrated), can experience \textit{reductions in conflict}.
\end{quote}

\setstretch{1.8}
\noindent To directly test the mechanism of increased local development that we detailed in the previous section,

\begin{quote}
\setstretch{1}
\textbf{Hypothesis 3 (H3)}: Provinces hosting refugee sites, conditional on there being no other refugee hosting provinces in the same country-year (i.e. geographically concentrated), experience \textit{greater development}.
\end{quote}

\setstretch{1.8}

Following our expectation that returns to development and stability will be greatest in cases where refugee communities are concentrated, we consider a set of more specific hypotheses whose results should offer specific evidence as to the set of conditions under which refugee communities are more or less likely to be associated with increased conflict risk.

First, the benefits of local development and stability should require time to manifest. As one UNHCR official described, with the arrival of refugees, ``you have a sudden surge in demand for services. It could be a real challenge for local government. But [over time]... you do have people with capital and skills that can contribute positively to the economy. And employers can find a ready supply of laborers at lower wages. The first 6 months a new equilibrium imposes itself... So it's not altogether surprising that you go through the trajectory of at first shock, but then a period of stabilization, and even a changed environment.''\footnote{Senior Official at Policy Development and Evaluation, interview conducted on July 6, 2018.} 
Thus, we expect positive benefits of refugee concentration to emerge only after such initial transitional period. In contrast, conflict likelihood during the initial months of refugee arrival may be elevated.

Second, when refugee communities are not only concentrated but very large (relative to local communities), there may be more opportunity for increases in local economic activity and greater incentives for infrastructural development and other focused investments by the UNHCR and state governments. Yet, in large numbers, refugee communities may be more likely to alter ethnic balances and more drastically affect local economies (again, with potential winners and losers within the host communities) \citep[e.g.][]{Johnson:2011,Tumen:2016}. Examining refugee populations at the subnational level in Africa, \citet{Fisk:2014} finds no relationship between large camp-based refugee populations and conflict. Thus, our expectations are mixed. If we find that large numbers of refugees are associated with reduced conflict risk, this would support our claim that the stabilizing effects under such conditions can outweigh possible negative externalities. 

Third, we predict that refugee communities that are concentrated and located in remote, underdeveloped areas of countries will experience decreased conflict risk by attracting investments in infrastructure in the areas where the marginal benefits are likely to be the greatest. Finally, we expect humanitarian aid agencies and state actors to be more likely to operate in formal camps as opposed to informal settlements, given that they generally have more control over movement and programming in camps. We conduct a series of additional heterogeneous effects analyses to further explore under which conditions refugee-hosting areas would be more likely to see additional gains in development and reductions in conflict,

\begin{quote}
\setstretch{1}
\textbf{Hypothesis 4 (H4)}: For these geographically concentrated cases, the negative effects on conflict and positive effects on development are greater for areas hosting a) \textit{older} and b) \textit{larger} refugee sites; c) formal refugee \textit{camps}; d) and areas \textit{near the border}.
\end{quote}


\section*{Data and Methods}
\label{sec:data}

% DESCRIBE DATA 
Our unit of analysis is the province-year from 1990--2018. We construct a global panel dataset of \numprint{\Sexpr{length(unique(geddata$GMI_ADMIN))}} \textit{provinces}, formally, first-order administrative units, using the ERSI map of global provinces in 1998.\footnote{Neither ESRI nor any other source with which are familiar has constructed first-level administrative boundary maps on a yearly basis that we might otherwise use for this study.}\textsuperscript{,}\footnote{We use provinces instead of grid squares, such as PRIO-GRID, as our unit of analyses for two reasons. The first issue concerns the distribution of grids across countries. In many cases, grid squares are intersected by country borders. For our purposes, this is particularly problematic given the spatial distribution of refugee sites; roughly 27\% of refugee sites are within 100km of the border. Grid squares are, therefore, susceptible to capturing refugee communities on one side of an international boundary and conflict events on the other side. In such cases, the presence of refugee communities and incidence of conflict would be incorrectly positively associated. Secondly, some of our refugee site location data lack exact latitude/longitude coordinates. In these cases, our team of research assistants were able to identify their location to provinces based on UNHCR reports. This would not be possible for grid squares.}\textsuperscript{,}\footnote{We also exclude non-relevant provinces (e.g. Antarctica, Greenland, United States Minor Outlying Islands), and small island states not included in cShapes country boundaries, described in more detail later, or attached to larger relevant countries (e.g. the United States).} 
To do this, for each year of the study period, we match the centroid of every province to a time-varying country boundary dataset constructed by \cite{weidmann2010geography}.\footnote{This process matches most provinces to countries; however, in some cases, centroids fall outside country boundaries. In these cases, we correct the mismatches by using a nearest distance function that maps the unmatched provinces to the country to which they belong. We also note that countries typically experience boundary changes on days other than the first day of the year (Jan 1). Given that our analysis is at the year level, we, therefore, assign provinces in the year of a change to one country or the other for that year (e.g. a province in Sudan that later becomes part of South Sudan). We note that such cases are rare. This process does not create a perfect time-varying province level map of the world as, in some cases, the size and/or boundaries of individual provinces themselves may have changed. However, we are unfamiliar with any time varying global sub-national dataset that is detailed as the one we have constructed for this project.}
While this comprehensive province-panel (\textit{Full Data}) spans \textit{every country}, we recognize that not all non-refugee hosting provinces may be appropriate counterfactuals. Since we aim to compare provinces that host refugee sites (treatment group) with provinces that could plausibly host refugee sites (control group), it may not be appropriate to include provinces in countries like Japan or Luxembourg that are unlikely to ever host refugee sites, given that they have never done so.\footnote{Here, we also make the distinction between refugees sites, asylum areas where refugees are staying, and \textit{third-country refugee resettlement}, where another state, such as Canada or the U.S., has agreed to grant permanent settlement to former refugees after a selection process based on government quotas and other criteria. To give a sense of scale, in 2017, less than one percent of all refugees were resettled. Please see the UNHCR page on resettlement for more details: \url{https://www.unhcr.org/resettlement.html}. Where these former refugees are resettled is also not included in our data of displacement locations.} Thus, the data we use for the main analysis in this paper we call the \textit{Main Data}, which only includes provinces in the \Sexpr{length(unique(geddatasub$Country))} countries that have at some point in our panel hosted a refugee site, subsetting out the provinces in \Sexpr{sum(tapply(geddata$rtb, geddata$Country, sum) == 0)} never-hosting countries.\footnote{As a robustness check, we rerun the analyses using the full data of all countries, see SI Section \ref{SIsec:Full}. Results do not substantively change.} We are left with \numprint{\Sexpr{length(unique(geddatasub$GMI_ADMIN))}} provinces and \numprint{\Sexpr{nrow(geddatasub)}} province-year observations. Section \ref{SIsec:summary} in the SI provides summary and descriptive statistics of our data.

% REFUGEE SITES
\subsection*{Explanatory Variables: Presence of Refugee Sites (Camps and Settlements)}

We use new displacement locations data from the UNHCR Field Information and Coordination Section (FICSS). These data encompass the universe of \numprint{\Sexpr{nrow(allsites)}} refugee and IDP sites open during our study period. To construct these data, UNHCR country teams and field staff report the location of any known displaced communities to FICSS. Thus, we are able to indicate within any province in a given year whether a community of refugees or IDPs had settled there. These data include latitude/longitude coordinates, creation date, closing date (if closed), displacement type -- refugee or internally displaced -- and whether the site is a formal camp or informal settlement.\footnote{In approximately 4\% of locations, geographic coordinates were missing. Most of these cases are IDP settlements in Central African Republic and Nigeria. For the eleven cases of refugee camps with missing coordinates, working with research assistants, we manually added these coordinates to the dataset.} According to FICSS, the distinction in these data between camp and settlement is largely determined by the host-country. In general, camps receive humanitarian assistance, UNHCR presence, and host-country oversight whereas settlements are typically informal communities. 

We construct a binary indicator {\tt RefugeePresence} for whether a province-year was hosting at least one refugee site. Again, we operationalize \textit{refugee sites} as encompassing all \textit{formal refugee camps} as well as \textit{informal refugee settlements}.\footnote{
In 18 cases, the geographic coordinates provided to the refugee sites were matched to the wrong country, because such locations were in very close proximity to international borders. We manually correct these cases by using the country names included in the UNHCR dataset to determine the actual country of settlement. In a relatively small but substantial number of cases, observations were missing open and close dates. In most cases, with the help of research assistants, we manually corrected these dates for refugee camps and settlements. When we could not identify the exact open and close years, we instead identified the earliest date (going as far back as the first year of the study 1990) during which at least one site was open in the entire province. We also determined the last year on which at least one site was open (going as far forward as the last year of the study 2018). This way, for each affected province-year observation, we could construct the relevant variables without knowing the open and close dates of all relevant camps and settlements.}  
Table \ref{tab:refsitesnumregion} in the SI shows, by region, the number of refugee sites broken down by camps and settlements. Most refugee-hosting provinces only have one site in any given year (\numprint{\Sexpr{nrow(geddatasub[geddatasub$rt == 1,])}} province-year observations), while \numprint{\Sexpr{nrow(geddatasub[geddatasub$rt > 1,])}} province-years host more than one. However, because we do not have information on the population sizes of these sites, we do not believe using number of sites is preferable to an indicator of presence of at least one site. Table \ref{tab:refsitesnumprovyr} in the SI shows the number of refugee sites by number of province-year observations. There are several outlier province-years with over one hundred sites listed. Because UNHCR country offices classify sites differently for their own logistical purposes, the number of listed sites is not readily comparable across countries. 
Although the UNHCR displacement location dataset includes scattered cases of individual refugees, we do not include them in our analyses precisely because these are small numbers of individuals or households, often living in urban areas. To be clear, if there are refugee camps or settlements within a city, we count these locations.\footnote{For example, in the mid-2000s approximately 102,000 refugees from Angola and the Great Lakes region resided in five camps in the capital of Zambia, Lusaka. Please see this UNHCR report for more details: \url{https://www.unhcr.org/4371d1ae0.pdf}.} But we exclude cities where \textit{dispersed individual refugees} are reported.\footnote{For example, a few hundred refugees from Iraq, Liberia, Nigeria, and Somalia tracked by the UNHCR live dispersed across seven major Chinese cities, including Beijing and Shanghai. Please see this UNHCR report for more details: \url{http://www.unhcr.org/5000187d9.pdf}.}

It is important to note that the majority of our analyses can only speak to the presence versus absence of refugee communities and not the sizes of these locations. Unfortunately, for our study period at the global level, the UNHCR did not collect information on population, demographic or ethnic composition, reason for displacement, nationality of displaced persons, levels of aid or involvement by UNHCR or other humanitarian organizations. Nevertheless, for a subset of our data covering  African countries from 2010--2015, we have information on the total number of refugees in each site, which we aggregate to the province-year level. We discuss this population subset data and associated analyses below in greater detail.

To account for the influence of refugee sites outside a given province but still within the same country, we also construct {\tt Refugee Presence in Other Provinces}, which indicates whether there exists at least another province in the same country-year hosting refugee sites. For our main analysis with respect to H1 on the effect of refugee sites on its province, we include this variable as a control variable, because having refugee sites in other areas of the country may affect both the likelihood of conflict and refugee settlement in our province of concern. For our secondary analysis with respect to H2 on how concentrated or dispersed geographic distribution of refugee sites within a country might affect conflict in a given province, our treatment variable is the interaction between {\tt RefugeePresence} and {\tt Refugee Presence in Other Provinces}. Here, we are interested in the conditional effect of refugee sites on its host province depending on whether there are other refugee site hosting provinces in the same country-year (i.e. dispersed refugee presence) or not (i.e. concentrated refugee presence). Finally, for the heterogeneous effects analysis in which we differentiate between type of refugee site, formal camp versus informal settlement, we use the binary variables {\tt RefugeeCamp} and {\tt RefugeeSettlement}. 


% CONFLICT 
\subsection*{Main Outcome Variables of Conflict} 

We generate four separate measures of conflict -- onset, incidence (that is, the continuation of civil war across years), violent events (attacks), and battle deaths. We adopt these separate measures because scholarship linking refugees to political violence has not focused exclusively on any one of these outcomes. Collectively, they describe ways in which refugees can destabilize the countries within which they settle with implications for the onset, continuation, and intensity of conflict.

First, {\tt Conflict Onset} and {\tt Conflict Incidence} are binary indicators of whether a province-year experiences new conflict or the continuation of conflict. To construct these measures, we use PRIO's conflict site dataset which covers the period 1990--2008. That dataset provides the estimated center points (in latitude and longitude coordinates) of the ``area directly affected by conflict'' \citep[p.2]{Dittrich:2012} for each year the conflicts are running as well as the radius for each conflict-year.\footnote{The dataset consists almost exclusively of sub-state conflicts but includes a very small number of cases of interstate war, which we include when assigning conflict values.}\textsuperscript{,}\footnote{Two percent of cases in the conflict circles dataset do not contain center points, and these are dropped from our analysis.} The radius for each conflict-year represents the estimated distance to the boundary of fighting further from the center. We use these measures to construct individual conflict-year circle shapefiles.\footnote{In some cases, the dataset identifies multiple ongoing conflicts in a given country in a given year. In such cases, we construct conflict circles for each conflict and use those to identify affected provinces.} 

For each year of the study period, we then intersect these conflict circles with all provinces of the conflict affected country, identifying those that experienced conflict on their soil and those that did not to generate a province-year binary civil war indicator. Under this approach, provinces that are only partially intersect with a conflict circle are considered as having experienced civil conflict. We are careful to ensure that conflict circles are matched only with affected countries so that neighboring country provinces are not erroneously assigned positive conflict values.\footnote{Finally, we note that \citet{Dittrich:2012} includes a very small number of cases (four) of interstate war in their dataset. We include these conflicts when assigning conflict values.} This variable captures the {\tt Conflict Incidence} of civil war. To construct a measure of {\tt Conflict Onset}, we assign a value of 1 to province-year observations in which civil war occurs when it had not in the previous year. For observations where civil war continued from a year previous, we assign a value of {\tt NA} (rather than {\tt 0}) so that years in which civil war is ongoing is not treated as the comparison for civil war onset. Figure \ref{fig:Conflict_time_sub} in the SI plots the proportion of these two variables by year, showing that neither measures of conflict are rare events requiring models that correct for small-sample bias.As an alternative method of constructing these two outcome measures, we also use wzoneData Conflict Polygons, which cover the full study period from 1990 through 2018. \citep{Kikuta:2020}.\footnote{The conflict polygons from wzoneData were created by \citet{Kikuta:2020} from UCDP Georeferenced Event Dataset (GED) 19.1 conflict events data \citep{Sundberg:2013,Pettersson:2020}, using a machine learning method. Note that this version of wzoneData excludes conflict events from the Syrian Civil War, activity by the Islamic State, and the US War in Afghanistan and Iraq.}

Next, to construct measures of conflict \textit{intensity}, we use the UCDP Georeferenced Event Dataset (GED) 19.1 with supplemental data for the Syrian Civil War \citep{Sundberg:2013,Pettersson:2020}, which provides geographic points for each violent event and associated count of battle deaths. Using the longitude and latitude coordinates associated with each event in this dataset, we match these events to the province within which they take place.\footnote{As with the process of match province centroid to individual country boundaries, a small number of latitude-longitude pairs do not match with a unique province. This almost invariably occurs where violent events took place in coastal regions so close to a given country's border that this matching process places them outside of the country's national boundaries. In these cases, we again use a nearest distance function to assign the unmatched conflict events to the relevant province.} We then create two measures of intensity: a count of the number of {\tt Violent Events} and the number of logged {\tt Battle Deaths}\footnote{Because of the spike in battle deaths in 1994 due to the Rwandan genocide, we log this outcome in our analyses.} that occurred in a given province-year. Figure \ref{fig:Conflict_time_sub} in the SI also plots these two outcomes over time. Lastly, as a robustness check, we calculate the number of violent events and battle deaths broken down by event type as defined by \citet{Sundberg:2013}: state based; non-state; and one-sided (see SI Section \ref{SIsec:conflicttypes} for more details).

% NIGHTTIME LIGHTS
\subsection*{Auxiliary Outcome Variable: Nighttime Lights} 

We use global satellite data of nighttime lights to test our proposed mechanism of increased development in cases of geographically concentrated refugee presence. We believe this is the best measure to capture local economic activity and development globally, but still at a subnational level. Various research efforts have validated nighttime lights as a ``good proxy for human development at the local level'' in precisely the type of less developed areas of the world that we are most focused on in our project \citep[p.1]{bruederle2018nighttime}. Three separate research projects focus on light emissions, comparing them with the results of local survey data, independently arrive at this same finding \citep{bruederle2018nighttime, michalopoulos2013pre, weidmann2017using}.\footnote{There are, of course, inferential limitations with nighttime light emissions, which are reviewed in detail by \cite{michalopoulos2018spatial}.} Thus, nighttime lights, unlike other public goods like number or quality of health centers and public schools for example, can measure highly localized changes in human activity, for all regions of the world (in an immediately comparable way), and for every year of our study period. Our analysis is also similar to \citet{Alix:2018}, who use nighttime lights to study changes in economic activity around refugee camps in northern Kenya. 

Our measure of {\tt Average Nighttime Lights} comes from PRIO-GRID \citep{Tollefsen:2012}, and it covers the years 1992--2012.\footnote{Specifically, PRIO-GRID measures average nighttime light emission from the DMSP-OLS Nighttime Lights Time Series Version 4 (Average Visible, Stable Lights, and Cloud Free Coverages) \citep{Lights:2011}. They use the data gathered from the newest satellites (F10 in 1992-93, F12 in 1994-1996, and so on).} It is calibrated ``to account for intersatellite differences and interannual sensor decay using calibration values from \citet{Elvidge:2014}.'' Values are standardized to be between 0 and 100, where 100 is the highest observed value in the global time-series, and 0 is the lowest. 


% CONFOUNDERS, very briefly
\subsection*{Confounders}

We control for the following confounders. We include a one year lagged dependent variable to account for past outcomes. Next, following the conflict diffusion literature, we construct {\tt Neighboring Violent Events}, a count of all violent events in neighboring provinces regardless of whether these provinces are domestic or foreign. We also take into account demographic and economic data. Specifically, we include logged {\tt Population}\footnote{Data on province-level population were similarly extracted from PRIO-GRID, which, derived it grid-level population measures from \cite{ciesin2004wri}, which is produced by U.S. National Aeronautics and Space Administration (NASA) Socioeconomic Data and Applications Center. These data cover the years 1990, 1995, 2000, and 2005. The CIESEN effort consists of combining these data with particular models to estimate population levels at a sub-national grid level.} that is lagged by one year. We also include {\tt GDP} in U.S. dollars at purchasing power parity \citep{nordhaus2006geography}.\footnote{These data also include the years 1990, 1995, 2000, and 2005.} We construct this by summing over the cells inside each province, also lagged by one year. Both variables are extracted from PRIO-GRID \citep{Tollefsen:2012}. Not all years have data for these two indicators; for missing observations, we linearly impute values between the years in a given province for which we do have data.\footnote{We begin with the grid-level measures produced by PRIO-GRID \citep{Tollefsen:2012}. We then match each grid to the provinces used in our study and then sum the population and GDP measures for all grids associated with each province. In rare cases where only a single year of data is included for a given province, we use that value alone.}

Scholars have also linked conflict to {\tt Terrain Ruggedness} \citep[e.g.][]{Fearon:2003}, and considerations of terrain also affect refugee settlement \citep{Jacobsen:1996, Jacobson:1996, Bariagaber:2006}. This variable denotes the standard deviation in the absolute elevation change of every one-kilometer grid relative to all contiguous one-kilometer grids within each province \citep{Shaver:2016}. We also include {\tt Province Size} (square km) to control for the size of the province. When constructing these data, \citet{Shaver:2016} use province shapefiles from 1998; therefore, terrain ruggedness and province size vary by province but do not change over time in our data. Next, we include {\tt Border Distance}, the logged distance (km) between the province's centroid to the international border. We do so because refugees tend to settle near international borders and such areas are often lacking in state capacity and could be more susceptible to conflict diffusion from neighboring countries. We also include {\tt Capital Distance}, the logged distance (km) between the province's centroid to the country's capital, because capitals are typically centers of state capacity, which also can affect refugee settlement and conflict propensity \citep{Braithwaite:2010}. These variables are also time invariant. 

The UNHCR data of displacement sites also indicate where internally displaced persons (IDPs) settle. \citet{Bohnet:2018} find that internal displacement is associated with domestic conflict diffusion, and the locations of refugee and IDP sites might also correlate. We thus include as an additional control a binary indicator of whether a province-year hosts one or more {\tt IDP Presence}, which includes both formal IDP camps and IDPS living outside of camps in informal settlements. Finally, we include {\tt Country} and {\tt Year fixed effects}. Unfortunately, we are not able to include control variables for relations between sending and host countries because the UNHCR data does not include demographic information on the refugees such as their origin countries.\footnote{The ethnic makeup in an area may also affect both refugee settlement as well as conflict. Particularly within sub-Saharan Africa, where international borders were drawn up by colonial authorities with little consideration for existing groups \citep[e.g.][]{Asiwaju:1985,Davidson:1993,Young:2001,Englebert:2002a}, it is likely that refugees settle in areas where they share ethnic ties with some citizen groups \citep{Michalopoulos:2013}. Refugees could shift the ethnic composition and balance of these host communities, possibly increasing the likelihood of conflict \citep{Loescher:1992,Forsberg:2014,Ruegger:2017}. To account for ethnic diversity, we incorporate the number of {\tt Excluded ethnic groups} from the GeoEthnic Power Relations (GeoEPR) data, which covers 1990 to 2013 \citep{Wucherpfennig:2011,Vogt:2015}. However, given the years of coverage and missingness, including this variable cuts our data from \numprint{\Sexpr{nrow(geddatasub)}} to \numprint{\Sexpr{nrow(geddatasub[is.na(geddatasub$excluded_mean) == F,])}} observations. Thus, our main analyses do not include this variable, but when we do, results do not substantively change.}

% POPULATION SUBSET
\subsection*{Subset with Refugee Population Data: African Countries from 2010-2015}
\label{sec:popdata}

Finally, for a subset of the data, we have information from the UNHCR on refugee populations, specifically the total number of refugees per site, which we aggregate to the province-year level. We have this data for African countries from 2010 to 2015. Thus, for the heterogeneous effects analysis based on population size, we only examine the violent events and logged battle death outcomes as they span this time period. We conduct two analyses using this subset refugee population data. First, for H1, we regress conflict outcomes, violent events and logged battle deaths, on logged refugee population and separately, the ratio of refugee population to local province population to examine the effect of (relative) population on conflict. 
Second, we create a binary variable for heterogeneous effects analysis based on the size of refugee presence in a province-year in order to test H4. We designate refugee-hosting provinces that have a 5\% ratio or greater of refugee population compared to local population as ``large'' and hosting provinces under 5\% as ``small.''

\subsection*{Estimation}
Formally, we estimate the following model using logistic regression for the binary {\tt Conflict Onset} and {\tt Conflict Incidence} outcomes, and OLS for {\tt Violent Events}, logged {\tt Battle Deaths} and {\tt Average Nighttime Lights}:

\setstretch{1.3}
\begin{small}
\begin{equation*}
\begin{split}
Y_{i,t} \mid X_{i,t}  = & \ f(\beta_{0} + \beta_{1}RefugeePresence_{i,t} + \beta_{2}Refugee Presence in Other Provinces_{i,t} + \beta_{3}Y_{i,t-1} \\
& \quad + \beta_{4}NeighboringViolentEvents_{i,t} + \beta_{5}Population_{i,t-1} + \beta_{6}GDP_{i,t-1} \\
& \quad + \beta_{7}TerrainRuggedness_{i} + \beta_{8}ProvinceSize_{i} + \beta_{9}BorderDistance_{i}\\
& \quad + \beta_{10}CapitalDistance_{i} + \beta_{11}IDPPresence_{i,t} + \gamma_{c} + \lambda_{t} + \epsilon_{i,t} )
\end{split}
\end{equation*}
\end{small}
\setstretch{2}

\noindent in which $i$ denotes provinces, $t$ denotes year, and $c$ denotes countries. $Y_{i,t}$ is the binary outcome for conflict onset or incidence or continuous outcome for violent events or logged battle deaths; $RefugeePresence_{i,t}$ is the binary treatment indicator for whether the province-year is hosting refugee sites; controlling for all confounder variables described above; $\gamma_{c}$ denotes country fixed effects; $\lambda_{t}$ denotes year fixed effects; and $\epsilon_{i,t}$ the province-year error term. 

For our secondary analysis of concentrated or dispersed refugee settlement patterns in which we examine the conditional effect of having refugee sites in other provinces in the same country-year, we run the same models as above except that we use the interaction term $RefugeePresence_{i,t} * Refugee Presence in Other Provinces_{i,t}$ as our treatment variable. For additional heterogeneous effects, we further interact this term with our heterogeneous binary variable of interest.

\subsection*{Refugee Location Selection, Placebo Tests, and Matching}

A major obstacle inherent to research on migrant-host relations is possible selection bias introduced by where migrants are located vis--\`{a}--vis host citizens. Voluntary migrants can choose to live in neighborhoods that are more accepting of them and/or whose residents share cultural and ethnic ties \citep{Massey:1987, Massey:1988}. Unlike voluntary migrants, however, refugees often have considerably less agency and time to choose where they settle since they are ``forcibly displaced.'' Nevertheless, we use a placebo test strategy to directly address selection bias concerns with the location of refugee sites. 

How are locations for refugee sites chosen? First, no standard international procedure exists to regulate how refugees are distributed within a host country or where refugee sites should be located. Migration research and qualitative interviews with refugee officials identify the key factors that determine where refugee sites are located, all of which we control for in our analyses. Patterns of refugee movements and settlements are foremost determined by the exogenous shocks of conflict and other exigent crises in neighboring countries. Within the host country, refugee sites are generally situated in areas where a quorum of refugees congregate based on terrain and proximity to the border \citep{Jacobsen:1996, Jacobson:1996, Bariagaber:2006}. Once settled, host governments are bound by international humanitarian law to the principle of non-refoulement -- the obligation to not send refugees away (Article 33, The 1951 Convention Relating to the Status of Refugees). 

Interviews with UNHCR officials confirm that while there are certainly some refugee camps, such as those in Turkey, are planned and controlled by the state, generally ``refugee sites are almost always established \emph{ad hoc} or unplanned.''\footnote{Senior Official 1 at FICSS, interview conducted on December 30, 2013.} ``Refugee locations are more by default than by design. When you have large numbers of people arriving in border areas where the government has limited resources, they settle where they can find a piece or land or water.''\footnote{Senior Official at Policy Development and Evaluation, interview conducted on July 6, 2018.}
Thus, we consider it highly unlikely that refugees are typically aware of local social and political dynamics or have the resources to carefully determine optimal settlement locations. Instead, refugees tend to settle on an \emph{ad hoc} basis. Humanitarian organizations like the UNHCR and state security forces then often respond by deploying resources to these sites as they are being established or sometime thereafter.

We control for the variables that researchers and experts believe affect refugee settlement and conflict likelihood. Nevertheless, there may still be unobserved factors inherent to the location of the refugee sites that also affect conflict. To preclude the possibility of unobserved confounders, we use placebo tests to show that there is no relationship between past conflict and whether a province-year will host refugee sites in the future. In short, the objective behind placebo tests is to confirm that an effect does not exist when it should not exist, because if it does then factors other than the proposed independent variable are driving the effect, revealing selection bias. Similar to how, among many others, \citet{Lee:2001} shows that future electoral outcomes do not affect past elections and \citet{Rothstein:2010} shows that future teachers cannot affect students' past performance, this same logic applies with our placebo strategy; our tests aim to show that the presence of future refugee sites has no effect on past conflict outcomes. And since these areas eventually do host refugee sites in the future, they are good candidates for counterfactual refugee-hosting areas in the present; they embody the selection characteristics (e.g. geographic, sociological, legal, etc.) that would determine where a potential site could be located. 

To run our placebo tests, we construct indicators for {\tt Placebo Refugee Presence}, which is coded as a {\tt 1} for province-years for which the province will eventually host at least one refugee site, {\tt 0} for province-years of provinces that will never host a refugee site (even though they may belong to countries that host refugee sites), and {\tt NA} for the year before a province begins to host refugee sites (to be conservative in case the exact date of the refugee site creation is an estimate, especially for informal refugee settlements) and then all the subsequent province-years once a province starts to host a refugee site, thus dropping true treated observations.\footnote{We do the same for {\tt Placebo Refugee Camps} and {\tt Placebo Refugee Settlements} for heterogeneous effects analysis.} Since these placebo observations do not yet host refugees, there should be no effect of these sites on the outcomes, which is what we find. This suggests that our analysis controls for the correct confounders. 

As an alternative causal identification strategy, we use a matching method for causal identification with time-series cross-section data by \citet{Imai:2018}. In this method, each treated observation is matched with a set of control observations that share identical treatment history up to three years (default). Then this set of matched controls is further refined by adjusting for covariates. Finally, we simply calculate the difference-in-differences estimator in order to account for an underlying time trend. To do this, we use the R package \texttt{PanelMatch} \citep{Imai:2018package}.

\section*{Results}
\label{sec:results}

\subsection*{H1: Effect of Refugee Presence on Conflict}

How does the presence of refugee sites affect conflict? Table \ref{tab:MainModelsH1} shows that across the model specifications, there are no effects of refugee presence on any conflict outcomes -- onset and incidence (1990--2008), violent events and battle deaths (1990--2018), confirming our expectations for H1. There even seems to be a negative effect for conflict onset, a finding we explore in greater detail in H2. The effects of placebo refugee presence are also null, as expected, which allows us to rule out the possibility that a negative correlation is driven by refugee selection into areas that \emph{ex ante} were less conflict prone. We briefly discuss the effects associated with the control variables, holding all other variables constant. First, the relationship between conflict outcomes and the presence of refugee sites in another province is consistently negative and statistically significant. This binary variable takes on the same value for all or almost all the provinces in a country-year. For a country-year with no refugee sites, all provinces have a value of 0; for a country-year with multiple provinces hosting refugee sites, all provinces have a value of 1; and for a country-year with only one province hosting refugee sites, every other province has a value of 1 except for that province, which has a value of 0.


% REGRESSION TABLE FOR H1
<<MainOnsetModelsH1, eval=TRUE, echo = FALSE, tidy=TRUE, warning=FALSE, message=FALSE, strip.white=TRUE>>=

# If perfect separation, because too many countries with no conflict in the full dataset, uninformitive priors are chosen based on the stan-dev Prior Choice wiki: https://github.com/stan-dev/stan/wiki/Prior-Choice-Recommendations

## Treatment model, with subset data
onset.h1.sub <- bayesglm(onset.n ~ rtb + 
                       rtb.other + 
                       onset.n_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       #log(bdist3) +
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = geddatasub[geddatasub$year < 2009,],
               #prior.scale = 1, prior.df = 3, 
               family = binomial(link = "logit")) 

#summary(onset.h1.sub)

## Placebo model
onset.h1.sub.pla <- bayesglm(onset.n ~ rtb.placebo + 
                       rtb.other + 
                       onset.n_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       #log(bdist3) +
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = plagedrtbsub[plagedrtbsub$year < 2009,],
               #prior.scale = 1, prior.df = 3, 
               family = binomial(link = "logit")) 

#summary(onset.h1.sub.pla)

# Generate predicted probs
onset.h1.sub.dplot <- pred.bi(onset.h1.sub) #generate predicted probs
onset.h1.sub.dplot$Group <- "Actual presence" #label the type of estimate

onset.h1.sub.pla.dplot <- pred.bi(onset.h1.sub.pla)
onset.h1.sub.pla.dplot$Group <- "Placebo presence"

plot.onset.h1.sub <- rbind(onset.h1.sub.dplot, #combine for ggplot
                           onset.h1.sub.pla.dplot)

# save output as Rdata files
# save(onset.h1.sub, file = "onset.h1.sub.Rdata")
# save(onset.h1.sub.pla, file = "onset.h1.sub.pla.Rdata")
# save(plot.onset.h1.sub, file = "plot.onset.h1.sub.Rdata")

@

<<MainIncidenceModelsH1, eval=TRUE, echo = FALSE, tidy=TRUE, warning=FALSE, message=FALSE, strip.white=TRUE>>=

## Treatment model, with subset data
incidence.h1.sub <- bayesglm(incidence ~ rtb + 
                       rtb.other + 
                       incidence_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       #log(bdist3) +
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = geddatasub[geddatasub$year < 2009,],
               #prior.scale = 1, prior.df = 3,
               family = binomial(link = "logit")) 

#summary(incidence.h1.sub)

## Placebo model
incidence.h1.sub.pla <- bayesglm(incidence ~ rtb.placebo + 
                       rtb.other + 
                       incidence_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       #log(bdist3) +
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = plagedrtbsub[plagedrtbsub$year < 2009,],
               #prior.scale = 1, prior.df = 3,
               family = binomial(link = "logit")) 

#summary(incidence.h1.sub.pla)

# Generate predicted probs
incidence.h1.sub.dplot <- pred.bi(incidence.h1.sub) #generate predicted probs
incidence.h1.sub.dplot$Group <- "Actual presence" #label the type of estimate

incidence.h1.sub.pla.dplot <- pred.bi(incidence.h1.sub.pla)
incidence.h1.sub.pla.dplot$Group <- "Placebo presence"

plot.incidence.h1.sub <- rbind(incidence.h1.sub.dplot, #combine for ggplot
                           incidence.h1.sub.pla.dplot)

# save output as Rdata files
# save(incidence.h1.sub, file = "incidence.h1.sub.Rdata")
# save(incidence.h1.sub.pla, file = "incidence.h1.sub.pla.Rdata")
# save(plot.incidence.h1.sub, file = "plot.incidence.h1.sub.Rdata")

@

<<MainAttackModelsH1, eval=TRUE, echo = FALSE, tidy=TRUE, warning=FALSE, message=FALSE, strip.white=TRUE>>=

## Treatment model, with subset data
attack.h1.sub <- lm_robust(attack ~ rtb + 
                       rtb.other + 
                       attack_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       #log(bdist3) +
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = geddatasub)

#summary(attack.h1.sub)

## Placebo model
attack.h1.sub.pla <- lm_robust(attack ~ rtb.placebo + 
                       rtb.other + 
                       attack_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       #log(bdist3) +
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = plagedrtbsub)

#summary(attack.h1.sub.pla)


# Generate predicted probs
attack.h1.sub.dplot <- tidy(attack.h1.sub)[attack.h1.sub$term == "rtb",] 
attack.h1.sub.dplot$Group <- "Actual presence" #label the type of estimate


attack.h1.sub.pla.dplot <- tidy(attack.h1.sub.pla)[attack.h1.sub.pla$term == "rtb.placebo",] 
attack.h1.sub.pla.dplot$Group <- "Placebo presence"
 
plot.attack.h1.sub <- rbind(attack.h1.sub.dplot, #combine for ggplot
                            attack.h1.sub.pla.dplot)

@

<<MainBattleDeathModelsH1, eval=TRUE, echo = FALSE, tidy=TRUE, warning=FALSE, message=FALSE, strip.white=TRUE>>=

## Treatment model, with subset data
best.h1.sub <- lm_robust(log(best+1) ~ rtb + 
                       rtb.other + 
                       log(best_1+1) +
                       attack_neighbors_sum + 
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       #log(bdist3) +
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),   
               data = geddatasub)

#summary(best.h1.sub)

## Placebo model
best.h1.sub.pla <- lm_robust(log(best+1) ~ rtb.placebo + 
                       rtb.other + 
                       log(best_1+1) +
                       attack_neighbors_sum + 
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       #log(bdist3) +
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),   
               data = plagedrtbsub)

#summary(best.h1.sub.pla)


# Generate predicted probs
best.h1.sub.dplot <- tidy(best.h1.sub)[best.h1.sub$term == "rtb",] 
best.h1.sub.dplot$Group <- "Actual presence" #label the type of estimate

best.h1.sub.pla.dplot <- tidy(best.h1.sub.pla)[best.h1.sub.pla$term == "rtb.placebo",] 
best.h1.sub.pla.dplot$Group <- "Placebo presence" #label the type of estimate

plot.best.h1.sub <- rbind(best.h1.sub.dplot, #combine for ggplot
                                        best.h1.sub.pla.dplot)

@

<<RegTabMainModelsH1, eval = TRUE, echo = FALSE, tidy=TRUE, fig.width = 7, fig.height = 4, out.width= ".9\\linewidth", fig.align='center', warning=FALSE, message=FALSE, strip.white=TRUE, results='asis'>>=

# setwd("Paper_Inputs")
# 
# # Bring in RData
# load("onset.h1.sub.Rdata") 
# load("onset.h1.sub.pla.Rdata")
# load("incidence.h1.sub.Rdata")
# load("incidence.h1.sub.pla.Rdata")

print(texreg(list(onset.h1.sub, 
                  onset.h1.sub.pla,
                  incidence.h1.sub,
                  incidence.h1.sub.pla, 
                  attack.h1.sub,
                  attack.h1.sub.pla,
                  best.h1.sub,
                  best.h1.sub.pla),
          custom.model.names = c("Onset", "Onset (p)",
                          "Incidence", "Incidence (p)",
                          "Events", "Events (p)",
                          "Deaths", "Deaths (p)"),
          omit.coef='Country|year',
          include.ci = FALSE,
          include.aic = TRUE, 
          include.bic = FALSE,
          include.loglik = TRUE, 
          include.deviance = FALSE, 
          include.rmse = FALSE,
          custom.coef.names=c("Intercept", 
                              "Refugee Presence",
                              "Refugee Presence in Other Provs",
                              "Lagged Onset",
                              "Sum Events in Neighbor Provs",
                              "Lagged Population (logged)",
                              "Lagged GDPpc",
                              "Terrain Ruggedness",
                              "Province Size (sqkm)",
                              "Distance from border (km logged)",
                              "Distance from capital (km logged)",
                              "IDP Presence",
                              "Placebo Refugee Presence",
                              "Lagged Incidence",
                              "Lagged Events",
                              "Lagged Deaths"),
          reorder.coef=c(2,13,3,4,14,15,16,5,6,7,8,9,10,11,12,1),
          caption = "Regression table for H1: Effects of Refugee Presence on Conflict Outcomes and their respective Placebo models (p). Note that for outcomes Onset and Incidence, the data is from 1990--2008, while Violent Events and Battle Deaths (logged) is 1990--2018. All models include control variables and Country and Year fixed effects.",
          label = "tab:MainModelsH1",
          scalebox='0.8',
          use.packages = FALSE,
          float.pos = "H"))

@

The effects for the lagged dependent variables confirm that new conflict (onset) is less likely if there was an onset of conflict the previous year, while the likelihood of continued conflict (incidence), number of violent events, and battle deaths are greater if the province experienced greater conflict in the previous year. All conflict outcomes are positively affected by greater neighboring violent events (spatial conflict diffusion). Higher population is also associated with greater conflict risk. Moving farther away from the international border and from the capital are weakly associated with less risk of conflict. These findings are partially consistent with previous work in the area. In particular, \citet{Buhaug:2006} find that conflict is more likely near international borders and further away from capitals. The presence of IDPs does not seem to alter conflict outcomes, but additional research examining the direct effects of IDPs is necessary. Lastly, GDP from the previous year is negatively associated with conflict onset and incidence, which speaks to how increased development might mitigate the risk of conflict. We are cautious of interpreting the coefficients on our control variables, however, for methodological reasons outlined in \citet{cinelli2020making} and \citet{Keele:2019}.\footnote{Thus, our argument and findings that areas hosting refugees are no more at risk of conflict, and in some cases, are less likely to experience conflict than non-hosting areas, appear to be more or less at odds with the findings represented by \cite{Salehyan:2006}. However, as we detail in SI Section \ref{SIsec:SGextension}, when we extend \cite{Salehyan:2006}'s study to include more recent decades, their results no longer hold. It is likely, as we describe, that some factor or set of factors have changed such that refugees settlement in recent decades are no longer positively associated with increased civil conflict.}

% Population regression results 
To further confirm this null result, we turn to our subset data for which we have information on population numbers which covers Africa from 2010--2015 (as described in Section \ref{sec:popdata}). We replace our main independent variable of refugee presence (binary) with population, and we also observe no effect of greater numbers of refugees on conflict. The regression table in SI Section \ref{SIsec:regpopulation} shows no effects of logged refugee population on violent events or battle deaths. Nonetheless, the absolute number of refugees may matter less than the \textit{relative} number of refugees compared to the local population; a larger ratio may tip the scales demographically and incite conflict. When we replace logged population with the ratio of refugee population to local population, the effects on conflict are negative but not statistically significant. In Section \ref{sec:heteffects} testing H4, we explore the heterogeneous effects of refugee population size.

% PRED PROBS FIGURE FOR H1
<<MainModelsH1_sub, eval=TRUE, echo = FALSE, tidy=TRUE, fig.width = 9, fig.height = 3, out.width= "1\\linewidth", fig.align='center', warning=FALSE, message=FALSE, strip.white=TRUE, fig.cap= paste("This figure shows the effect of refugee presence on conflict outcomes (black) -- onset and incidence (1990--2008), violent events and battle deaths (1990--2018) -- compared to their respective placebo estimates, i.e. effect of future refugee presence (gray). All point estimates include 95$\\%$ CIs.")>>=
 
# setwd("Paper_Inputs")
# 
# # Bring in RData from UCDP data
# load("plot.onset.h1.sub.Rdata")
# load("plot.incidence.h1.sub.Rdata")

# Select and rename columns
plot.onset.h1.sub$outcome <- "onset"
plot.incidence.h1.sub$outcome <- "incidence"

# plots
pd <- position_dodge(0.5) # move them .05 to the left and right

PLOT.onsetincidence.h1.sub <- bind_rows(plot.onset.h1.sub[plot.onset.h1.sub$Plot == 3,],
          plot.incidence.h1.sub[plot.incidence.h1.sub$Plot ==3,]) %>%
  mutate(outcome = fct_relevel(outcome, "onset")) %>%
  ggplot(aes(x = outcome, y = Means, 
                           group=Group, colour=Group
                        )) + 
  geom_point(position=pd, size=3) +
  geom_errorbar(aes(ymin = as.numeric(as.character(CIL)), 
                    ymax = as.numeric(as.character(CIU))), 
                width = 0, size = .7, position = pd) +
  geom_hline(aes(yintercept=0)) +
  ylab("Change in Predicted Probability") +
  xlab("") +
  scale_x_discrete(labels=c("Onset",
                           "Incidence")) +
  scale_y_continuous(labels = scales::percent_format(accuracy = 1), limits = c(-.05, .05)) +
  scale_colour_manual(values=c("black", "gray60")) +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        panel.border = element_rect(colour = "gray", fill=NA, size=.8),
        legend.title = element_blank(),
        legend.justification = c(1, 1), 
        legend.position= c(.99, .99),
        legend.box = "horizontal",
        legend.direction = "vertical", 
        legend.key = element_rect(colour = "transparent", fill = "white")
        ) 

PLOT.attack.h1.sub <- ggplot(plot.attack.h1.sub, aes(x = outcome, y = estimate, 
                           group=Group, colour=Group
                        )) + 
  geom_point(position=pd, size=3) +
  geom_errorbar(aes(ymin = as.numeric(as.character(conf.low)), 
                    ymax = as.numeric(as.character(conf.high))), 
                width = 0, size = .7, position = pd) +
  geom_hline(aes(yintercept=0)) +
  ylim(-1.5,1.5) +
  ylab("Change in Predicted Number") +
  xlab("") +
  scale_x_discrete(labels=c("Violent Events")) +
  scale_colour_manual(values=c("black", "gray60")) +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        legend.position = "none",
        panel.border = element_rect(colour = "gray", fill=NA, size=.8)
        )

PLOT.best.h1.sub <- ggplot(plot.best.h1.sub, aes(x = outcome, y = estimate, 
                           group=Group, colour=Group
                        )) + 
  geom_point(position=pd, size=3) +
  geom_errorbar(aes(ymin = as.numeric(as.character(conf.low)), 
                    ymax = as.numeric(as.character(conf.high))), 
                width = 0, size = .7, position = pd) +
  geom_hline(aes(yintercept=0)) +
  ylim(-.25,.25) +
  ylab("Change in Predicted Number (logged)") +
  xlab("") +
  scale_x_discrete(labels=c("Battle Deaths")) +
  scale_colour_manual(values=c("black", "gray60")) +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        legend.position = "none",
        panel.border = element_rect(colour = "gray", fill=NA, size=.8)
        )


(PLOT.onsetincidence.h1.sub + ggtitle('Effect of Refugee Presence on Conflict Outcomes') +
  theme(plot.title = element_text(hjust=.5))) +
  (PLOT.attack.h1.sub + PLOT.best.h1.sub) 

@

Turning back to the results for refugee presence, Figure \ref{fig:MainModelsH1_sub} plots the effect sizes for Table \ref{tab:MainModelsH1}. We calculate the change in predicted values of the conflict outcomes given refugee presence (black) and placebo future presence (gray), keeping the control variables at their observed values with 95\% confidence intervals.\footnote{We use quasi-bayesian Monte Carlo simulation to generate quantities of interest and uncertainty estimates for the models estimated with logistic regression, and we analytically calculate the quantities of interest and uncertainty estimates for the models estimated with OLS regression.}
The baseline probability of conflict onset (i.e. no refugee presence) is 
\Sexpr{plot.onset.h1.sub[1,1]*100}\% (95\% CI $=[\Sexpr{plot.onset.h1.sub[1,4]*100}\%, \Sexpr{plot.onset.h1.sub[1,3]*100}\%]$). The effect of hosting refugees is 
\Sexpr{plot.onset.h1.sub[3,1]*100} percentage points (95\% CI $=[\Sexpr{plot.onset.h1.sub[3,4]*100}, \Sexpr{plot.onset.h1.sub[3,3]*100}]$), a statistically and substantively significant effect. The other three conflict outcomes are null, and our placebo tests perform well, confirming that there is no effect of future refugee presence which would point to possible selection bias. 
When using panel matching analysis, in SI Section \ref{SIsec:panelmatch}, we confirm the overall null effect of refugee presence on conflict outcomes. 


\subsection*{H2: Effect of Concentrated or Dispersed Refugee Presence on Conflict}

% MODELS FOR H2 
<<MainOnsetModelsH2, eval=TRUE, echo = FALSE, tidy=TRUE, warning=FALSE, message=FALSE, strip.white=TRUE>>=

## Treatment model, with subset data
onset.h2.sub <- bayesglm(onset.n ~ rtb + 
                       rtb.other + 
                       rtb_rtb.other +
                       onset.n_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       #log(bdist3) +
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = geddatasub[geddatasub$year < 2009,],
               #prior.scale = 1, prior.df = 3, 
               family = binomial(link = "logit")) 

#summary(onset.h2.sub)

## Placebo model
onset.h2.sub.pla <- bayesglm(onset.n ~ rtb.placebo + 
                       rtb.other + 
                       rtb.placebo_rtb.other +
                       onset.n_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       #log(bdist3) +
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = plagedrtbsub[plagedrtbsub$year < 2009,],
               #prior.scale = 1, prior.df = 3, 
               family = binomial(link = "logit")) 

#summary(onset.h2.sub.pla)

# Generate predicted probs
onset.h2.sub.dplot <- pred.bi.int(onset.h2.sub) #generate predicted probs
onset.h2.sub.dplot$Group <- "Actual presence" #label the type of estimate

onset.h2.sub.pla.dplot <- pred.bi.int(onset.h2.sub.pla)
onset.h2.sub.pla.dplot$Group <- "Placebo presence"

plot.onset.h2.sub <- rbind(onset.h2.sub.dplot, #combine for ggplot
                           onset.h2.sub.pla.dplot)

# save output as Rdata files
# save(onset.h2.sub, file = "onset.h2.sub.Rdata")
# save(onset.h2.sub.pla, file = "onset.h2.sub.pla.Rdata")
# save(plot.onset.h2.sub, file = "plot.onset.h2.sub.Rdata")

@

<<MainIncidenceModelsH2, eval=TRUE, echo = FALSE, tidy=TRUE, warning=FALSE, message=FALSE, strip.white=TRUE>>=

## Treatment model, with subset data
incidence.h2.sub <- bayesglm(incidence ~ rtb + 
                       rtb.other + 
                       rtb_rtb.other +
                       incidence_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       #log(bdist3) +
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = geddatasub[geddatasub$year < 2009,],
               #prior.scale = 1, prior.df = 3, 
               family = binomial(link = "logit")) 

#summary(incidence.h2.sub)

## Placebo model
incidence.h2.sub.pla <- bayesglm(incidence ~ rtb.placebo + 
                       rtb.other + 
                       rtb.placebo_rtb.other +
                       incidence_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       #log(bdist3) +
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = plagedrtbsub[plagedrtbsub$year < 2009,],
               #prior.scale = 1, prior.df = 3, 
               family = binomial(link = "logit")) 

#summary(incidence.h2.sub.pla)

# Generate predicted probs
incidence.h2.sub.dplot <- pred.bi.int(incidence.h2.sub) #generate predicted probs
incidence.h2.sub.dplot$Group <- "Actual presence" #label the type of estimate

incidence.h2.sub.pla.dplot <- pred.bi.int(incidence.h2.sub.pla)
incidence.h2.sub.pla.dplot$Group <- "Placebo presence"

plot.incidence.h2.sub <- rbind(incidence.h2.sub.dplot, #combine for ggplot
                           incidence.h2.sub.pla.dplot)

# save output as Rdata files
# save(incidence.h2.sub, file = "incidence.h2.sub.Rdata")
# save(incidence.h2.sub.pla, file = "incidence.h2.sub.pla.Rdata")
# save(plot.incidence.h2.sub, file = "plot.incidence.h2.sub.Rdata")

@

<<MainAttackModelsH2, eval=TRUE, echo = FALSE, tidy=TRUE, warning=FALSE, message=FALSE, strip.white=TRUE>>=

## Treatment model, with subset data
attack.h2.sub <- lm_robust(attack ~ rtb + 
                       rtb.other + 
                       rtb_rtb.other +
                       attack_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       #log(bdist3) +
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = geddatasub)

#summary(attack.h2.sub)

## Placebo model
attack.h2.sub.pla <- lm_robust(attack ~ rtb.placebo + 
                       rtb.other + 
                       rtb.placebo_rtb.other +
                       attack_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       #log(bdist3) +
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = plagedrtbsub)

#summary(attack.h2.sub.pla)


# Generate predicted probs
attack.h2.sub.dplot <- pred.lm.int.r(attack.h2.sub)
attack.h2.sub.dplot$Group <- "Actual presence" #label the type of estimate

attack.h2.sub.pla.dplot <- pred.lm.int.r(attack.h2.sub.pla)
attack.h2.sub.pla.dplot$Group <- "Placebo presence"

plot.attack.h2.sub <- rbind(attack.h2.sub.dplot, #combine for ggplot
                            attack.h2.sub.pla.dplot)
@

<<MainBattleDeathModelsH2, eval=TRUE, echo = FALSE, tidy=TRUE, warning=FALSE, message=FALSE, strip.white=TRUE>>=

## Treatment model, with subset data
best.h2.sub <- lm_robust(log(best+1) ~ rtb + 
                       rtb.other + 
                       rtb_rtb.other +
                       log(best_1+1) +
                       attack_neighbors_sum + 
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       #log(bdist3) +
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = geddatasub)

#summary(best.h2.sub)

## Placebo model
best.h2.sub.pla <- lm_robust(log(best+1) ~ rtb.placebo + 
                       rtb.other + 
                       rtb.placebo_rtb.other +
                       log(best_1+1) +
                       attack_neighbors_sum + 
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       #log(bdist3) +
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year), 
               data = plagedrtbsub)

#summary(best.h2.sub.pla)


# Generate predicted probs
best.h2.sub.dplot <- pred.lm.int.r(best.h2.sub)
best.h2.sub.dplot$Group <- "Actual presence" #label the type of estimate

best.h2.sub.pla.dplot <- pred.lm.int.r(best.h2.sub.pla)
best.h2.sub.pla.dplot$Group <- "Placebo presence"

plot.best.h2.sub <- rbind(best.h2.sub.dplot, #combine for ggplot
                            best.h2.sub.pla.dplot)

@

Do the effects on conflict outcomes change depending on how widely dispersed refugee sites are within a country? 
This section explores the interaction between the main independent variable, {\tt Refugee Presence} in a given province-year, and {\tt Refugee Presence in Other Provinces}, whether there are refugee sites in any other province in the same country-year. Substantively, this secondary analysis examines the difference between having refugee sites geographically concentrated or dispersed within a country. We argue that this difference has implications for resource distribution and development, which we explore further in the following section.

% PRED PROBS FIGURE FOR H2
<<MainModelsH2_sub, eval=TRUE, echo = FALSE, tidy=TRUE, fig.width = 9, fig.height = 5.5, out.width= "1\\linewidth", fig.align='center', warning=FALSE, message=FALSE, strip.white=TRUE, fig.cap= paste("This figure shows the effect of refugee presence on conflict outcomes (black) -- onset and incidence (1990--2008), violent events and battle deaths (1990--2018) -- compared to their respective placebo estimates, i.e. effect of future refugee presence (gray), conditional on refugee presence in other provinces of the same country-year. All point estimates include 95$\\%$ CIs.")>>=

# setwd("Paper_Inputs")
# 
# # Bring in RData from UCDP data
# load("plot.onset.h2.sub.Rdata")
# load("plot.incidence.h2.sub.Rdata")
# load("plot.onset.h2.full.Rdata")
# load("plot.incidence.h2.full.Rdata")

# Select and rename columns
plot.onset.h2.sub$outcome <- "onset"
plot.incidence.h2.sub$outcome <- "incidence"
plot.attack.h2.sub$outcome <- "attack"
plot.best.h2.sub$outcome <- "log(best + 1)"

# plots
pd <- position_dodge(0.5) # move them .05 to the left and right

PLOT.onsetincidence.h2a.sub <- bind_rows(plot.onset.h2.sub[plot.onset.h2.sub$Plot == 3,],
          plot.incidence.h2.sub[plot.incidence.h2.sub$Plot ==3,]) %>%
  mutate(outcome = fct_relevel(outcome, "onset")) %>%
  ggplot(aes(x = outcome, y = Means, 
                           group=Group, colour=Group
                        )) + 
  geom_point(position=pd, size=3) +
  geom_errorbar(aes(ymin = as.numeric(as.character(CIL)), 
                    ymax = as.numeric(as.character(CIU))), 
                width = 0, size = .7, position = pd) +
  geom_hline(aes(yintercept=0)) +
  ylab("Change in Predicted Probability") +
  xlab("") +
  scale_x_discrete(labels=c("Onset",
                           "Incidence")) +
  scale_y_continuous(labels = scales::percent_format(accuracy = 1), limits = c(-.12, .12)) +
  scale_colour_manual(values=c("black", "gray60")) +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        panel.border = element_rect(colour = "gray", fill=NA, size=.8),
        legend.title = element_blank(),
        legend.justification = c(1, 1), 
        legend.position= c(.99, .99),
        legend.box = "horizontal",
        legend.direction = "vertical", 
        legend.key = element_rect(colour = "transparent", fill = "white")
        ) 

PLOT.onsetincidence.h2b.sub <- bind_rows(plot.onset.h2.sub[plot.onset.h2.sub$Plot == 6,],
          plot.incidence.h2.sub[plot.incidence.h2.sub$Plot ==6,]) %>%
  mutate(outcome = fct_relevel(outcome, "onset")) %>%
  ggplot(aes(x = outcome, y = Means, 
                           group=Group, colour=Group
                        )) + 
  geom_point(position=pd, size=3) +
  geom_errorbar(aes(ymin = as.numeric(as.character(CIL)), 
                    ymax = as.numeric(as.character(CIU))), 
                width = 0, size = .7, position = pd) +
  geom_hline(aes(yintercept=0)) +
  ylab("Change in Predicted Probability") +
  xlab("") +
  scale_x_discrete(labels=c("Onset",
                           "Incidence")) +
  scale_y_continuous(labels = scales::percent_format(accuracy = 1), limits = c(-.12, .12)) +
  scale_colour_manual(values=c("black", "gray60")) +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        legend.position = "none",
        panel.border = element_rect(colour = "gray", fill=NA, size=.8)
        )

PLOT.attack.h2a.sub <- ggplot(plot.attack.h2.sub[c(1,3),], aes(x = outcome, y = Means, 
                           group=Group, colour=Group
                        )) + 
  geom_point(position=pd, size=3) +
  geom_errorbar(aes(ymin = as.numeric(as.character(CIL)), 
                    ymax = as.numeric(as.character(CIU))), 
                width = 0, size = .7, position = pd) +
  geom_hline(aes(yintercept=0)) +
  ylim(-3,3) +
  ylab("Change in Predicted Number") +
  xlab("") +
  scale_x_discrete(labels=c("Violent Events")) +
  scale_colour_manual(values=c("black", "gray60")) +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        legend.position = "none",
        panel.border = element_rect(colour = "gray", fill=NA, size=.8)
        )

PLOT.attack.h2b.sub <- ggplot(plot.attack.h2.sub[c(2,4),], aes(x = outcome, y = Means, 
                           group=Group, colour=Group
                        )) + 
  geom_point(position=pd, size=3) +
  geom_errorbar(aes(ymin = as.numeric(as.character(CIL)), 
                    ymax = as.numeric(as.character(CIU))), 
                width = 0, size = .7, position = pd) +
  geom_hline(aes(yintercept=0)) +
  ylim(-3,3) +
  ylab("Change in Predicted Number") +
  xlab("") +
  scale_x_discrete(labels=c("Violent Events")) +
  scale_colour_manual(values=c("black", "gray60")) +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        legend.position = "none",
        panel.border = element_rect(colour = "gray", fill=NA, size=.8)
        )

PLOT.best.h2a.sub <- ggplot(plot.best.h2.sub[c(1,3),], aes(x = outcome, y = Means, 
                           group=Group, colour=Group
                        )) + 
  geom_point(position=pd, size=3) +
  geom_errorbar(aes(ymin = as.numeric(as.character(CIL)), 
                    ymax = as.numeric(as.character(CIU))), 
                width = 0, size = .7, position = pd) +
  geom_hline(aes(yintercept=0)) +
  ylim(-.5,.5) +
  ylab("Change in Predicted Number (logged)") +
  xlab("") +
  scale_x_discrete(labels=c("Battle Deaths")) +
  scale_colour_manual(values=c("black", "gray60")) +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        legend.position = "none",
        panel.border = element_rect(colour = "gray", fill=NA, size=.8)
        )

PLOT.best.h2b.sub <- ggplot(plot.best.h2.sub[c(2,4),], aes(x = outcome, y = Means, 
                           group=Group, colour=Group
                        )) + 
  geom_point(position=pd, size=3) +
  geom_errorbar(aes(ymin = as.numeric(as.character(CIL)), 
                    ymax = as.numeric(as.character(CIU))), 
                width = 0, size = .7, position = pd) +
  geom_hline(aes(yintercept=0)) +
  ylim(-.5,.5) +
  ylab("Change in Predicted Number (logged)") +
  xlab("") +
  scale_x_discrete(labels=c("Battle Deaths")) +
  scale_colour_manual(values=c("black", "gray60")) +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        legend.position = "none",
        panel.border = element_rect(colour = "gray", fill=NA, size=.8)
        )


patch1 <- (PLOT.onsetincidence.h2a.sub + ggtitle("Effect of Concentrated Refugee Presence") + 
  theme(plot.title = element_text(hjust= 0))) +
  (PLOT.attack.h2a.sub + PLOT.best.h2a.sub) 

patch2 <- (PLOT.onsetincidence.h2b.sub + ggtitle("Effect of Dispersed Refugee Presence") + 
  theme(plot.title = element_text(hjust= 0))) +
  (PLOT.attack.h2b.sub + PLOT.best.h2b.sub) 

patch1/patch2


@

Table \ref{tab:MainModelsH2} in the SI confirms our expectations for H2. First, the marginal effect of refugee presence (i.e. cases in which a province hosting refugees is the only one in that country-year) is negative and statistically significant across all four conflict outcomes. This suggests that for geographically concentrated cases of refugee presence -- that is, conditional on being the only refugee-hosting province in a given country-year -- there is a \emph{conditional risk reduction effect}.\footnote{The marginal effect of the presence of refugee sites in another province is still negative and statistically significant. This could capture potential positive spillover effects of refugee sites on surrounding provinces. Another possible, non-exclusive explanation is that these coefficients reflect selection at the country level, as refugees go to less conflict-affected countries if given the option. We do not examine possible diffusion effects or country-level selection in this paper.} The interaction term is positive and statistically significant, meaning when other refugee sites are dispersed in other provinces, this risk reduction is effectively canceled out. There are no effects for the placebo tests, and the other control variables retain the same substantive interpretations as from H1, holding all else equal. 

Can we quantify this \emph{conditional risk reduction effect}? The baseline (no refugee sites) predicted probabilities of conflict onset and incidence are \Sexpr{plot.onset.h2.sub[1,1]*100}\% and \Sexpr{plot.incidence.h2.sub[1,1]*100}\%, respectively. From Figure \ref{fig:MainModelsH2_sub}, the effect of refugee presence conditional on no presence elsewhere in country is \Sexpr{plot.onset.h2.sub[3,1]*100} percentage points (95\% CI $=[\Sexpr{plot.onset.h2.sub[3,4]*100}, \Sexpr{plot.onset.h2.sub[3,3]*100}]$) for onset and \Sexpr{plot.incidence.h2.sub[3,1]*100} percentage points (95\% CI $=[\Sexpr{plot.incidence.h2.sub[3,4]*100}, \Sexpr{plot.incidence.h2.sub[3,3]*100}]$) for incidence. These are substantively large negative effects; given the baseline probabilities, the likelihood of conflict onset is more than halved in cases of geographically concentrated refugee presence. Furthermore, violent attacks are reduced by \Sexpr{plot.attack.h2.sub[1,1]} (95\% CI $=[\Sexpr{plot.attack.h2.sub[1,3]}, \Sexpr{plot.attack.h2.sub[1,4]}]$) and logged battle deaths by \Sexpr{plot.best.h2.sub[1,1]} (95\% CI $=[\Sexpr{plot.best.h2.sub[1,3]}, \Sexpr{plot.best.h2.sub[1,4]}]$).

If refugee sites are dispersed throughout the country, Figure \ref{fig:MainModelsH2_sub} shows there is still no effect on conflict. Finally, the null effects from the placebo tests (gray) suggest that the \emph{conditional risk reduction effect} is not attributable to unobservable confounders, because we do not observe any effects on provinces with future refugee presence, which we assume embody similar characteristics to where refugees tend to settle.


\subsection*{H3: \emph{Conditional Risk Reduction} through Development}
\label{sec:mechanisms}

What explains this \emph{conditional risk reduction effect}? We theorize that if refugee sites are geographically concentrated, the state and/or other humanitarian actors can focus their resources and attention to that particular area, as opposed to meting out their efforts to multiple refugee-hosting areas. From our expert interviews: ``When there is a reasonable number of refugees, it attracts resources of international community. There is a net economic gain in that area... The physical pretense of a large number of internationals might have some sort of calming effect, that might change the social and political dynamics.''\footnote{Senior Official of Shelter \& Settlements Section, interview conducted on July 3, 2018.} To test this proposed mechanism, we examine the effect of concentrated versus dispersed refugee presence on nighttime lights (1992--2012) as an indicator of development. 

<<NightLightsCModelsH1, eval=TRUE, echo = FALSE, tidy=TRUE, fig.align='center', warning=FALSE, message=FALSE, strip.white=TRUE>>=

## Treatment model, with subset data
nlights_calib_mean.h1.sub <- lm_robust(nlights_calib_mean ~ rtb + 
                       rtb.other + 
                       nlights_calib_mean_1 +
                       nlights_mean_neighbor +
                       attack_neighbors_sum + 
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       #log(bdist3) +
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = geddatasub)

#summary(nlights_calib_mean.h1.sub)

## Placebo model
nlights_calib_mean.h1.sub.pla <- lm_robust(nlights_calib_mean ~ rtb.placebo + 
                       rtb.other + 
                       nlights_calib_mean_1 +
                       nlights_mean_neighbor +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       #log(bdist3) +
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),   
               data = plagedrtbsub)

#summary(nlights_calib_mean.h1.sub.pla)


# Generate predicted probs
nlights_calib_mean.h1.sub.dplot <- tidy(nlights_calib_mean.h1.sub)[nlights_calib_mean.h1.sub$term == "rtb",] 
nlights_calib_mean.h1.sub.dplot$Group <- "Actual presence" #label the type of estimate


nlights_calib_mean.h1.sub.pla.dplot <- tidy(nlights_calib_mean.h1.sub.pla)[nlights_calib_mean.h1.sub.pla$term == "rtb.placebo",] 
nlights_calib_mean.h1.sub.pla.dplot$Group <- "Placebo presence"
 
plot.nlights_calib_mean.h1.sub <- rbind(nlights_calib_mean.h1.sub.dplot, #combine for ggplot
                            nlights_calib_mean.h1.sub.pla.dplot)

@

<<NightLightsCModelsH2, eval=TRUE, echo = FALSE, tidy=TRUE, fig.align='center', warning=FALSE, message=FALSE, strip.white=TRUE>>=

## Treatment model, with subset data
nlights_calib_mean.h2.sub <- lm_robust(nlights_calib_mean ~ rtb + 
                       rtb.other + 
                       rtb_rtb.other +
                       nlights_calib_mean_1 +
                       nlights_mean_neighbor +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       #log(bdist3) +
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),   
               data = geddatasub)

#summary(nlights_calib_mean.h2.sub)

## Placebo model
nlights_calib_mean.h2.sub.pla <- lm_robust(nlights_calib_mean ~ rtb.placebo + 
                       rtb.other + 
                       rtb.placebo_rtb.other +
                       nlights_calib_mean_1 +
                       nlights_mean_neighbor +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       #log(bdist3) +
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),   
               data = plagedrtbsub)

#summary(nlights_calib_mean.h2.sub.pla)


# Generate predicted probs
nlights_calib_mean.h2.sub.dplot <- pred.lm.int.r(nlights_calib_mean.h2.sub)
nlights_calib_mean.h2.sub.dplot$Group <- "Actual presence" #label the type of estimate

nlights_calib_mean.h2.sub.pla.dplot <- pred.lm.int.r(nlights_calib_mean.h2.sub.pla)
nlights_calib_mean.h2.sub.pla.dplot$Group <- "Placebo presence"

plot.nlights_calib_mean.h2.sub <- rbind(nlights_calib_mean.h2.sub.dplot, #combine for ggplot
                            nlights_calib_mean.h2.sub.pla.dplot)

@

% PRED PROBS FIGURE FOR H3
<<MainModelsH3, eval=TRUE, echo = FALSE, tidy=TRUE, fig.width = 8, fig.height = 3, out.width= ".85\\linewidth", fig.align='center', warning=FALSE, message=FALSE, strip.white=TRUE, fig.cap= paste("This figure shows the effect of refugee presence (black) on average nighttime lights (1992--2012), compared to their respective placebo estimates, i.e. effect of future refugee presence (gray), conditional on refugee presence in other provinces of the same country-year. All point estimates include 95$\\%$ CIs.")>>=
 

# Select and rename columns
plot.nlights_calib_mean.h1.sub$outcome <- "nighttime lights"
plot.nlights_calib_mean.h2.sub$outcome <- "nighttime lights"

# plots
pd <- position_dodge(0.5) # move them .05 to the left and right

PLOT.nightlights.h1.sub <- ggplot(plot.nlights_calib_mean.h1.sub, aes(x = outcome, y = estimate, 
                           group=Group, colour=Group
                        )) + 
  geom_point(position=pd, size=3) +
  geom_errorbar(aes(ymin = as.numeric(as.character(conf.low)), 
                    ymax = as.numeric(as.character(conf.high))), 
                width = 0, size = .7, position = pd) +
  geom_hline(aes(yintercept=0)) +
  ylim(-.25,.25) +
  ylab("Change in Predicted Lights") +
  xlab("") +
  scale_x_discrete(labels=c("Average Nighttime Lights")) +
  scale_colour_manual(values=c("black", "gray60")) +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        legend.position = "none",
        panel.border = element_rect(colour = "gray", fill=NA, size=.8)
        )

PLOT.nightlights.h2a.sub <- ggplot(plot.nlights_calib_mean.h2.sub[c(1,3),], aes(x = outcome, y = Means, 
                           group=Group, colour=Group
                        )) + 
  geom_point(position=pd, size=3) +
  geom_errorbar(aes(ymin = as.numeric(as.character(CIL)), 
                    ymax = as.numeric(as.character(CIU))), 
                width = 0, size = .7, position = pd) +
  geom_hline(aes(yintercept=0)) +
  ylim(-.25,.25) +
  ylab("Change in Predicted Lights") +
  xlab("") +
  scale_x_discrete(labels=c("Average Nighttime Lights")) +
  scale_colour_manual(values=c("black", "gray60")) +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        panel.border = element_rect(colour = "gray", fill=NA, size=.8),
        legend.title = element_blank(),
        legend.justification = c(1, 1), 
        legend.position= c(.99, .99),
        legend.box = "horizontal",
        legend.direction = "vertical", 
        legend.key = element_rect(colour = "transparent", fill = "white")
        ) 

PLOT.nightlights.h2b.sub <- ggplot(plot.nlights_calib_mean.h2.sub[c(2,4),], aes(x = outcome, y = Means, 
                           group=Group, colour=Group
                        )) + 
  geom_point(position=pd, size=3) +
  geom_errorbar(aes(ymin = as.numeric(as.character(CIL)), 
                    ymax = as.numeric(as.character(CIU))), 
                width = 0, size = .7, position = pd) +
  geom_hline(aes(yintercept=0)) +
  ylim(-.25,.25) +
  ylab("Change in Predicted Lights") +
  xlab("") +
  scale_x_discrete(labels=c("Average Nighttime Lights")) +
  scale_colour_manual(values=c("black", "gray60")) +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        legend.position = "none",
        panel.border = element_rect(colour = "gray", fill=NA, size=.8)
        )

# patch1 <- (PLOT.nightlights.h1.sub + ggtitle("Effect of Refugee Presence")) 
#            + theme(plot.title = element_text(hjust= 1.4))) 

patch2 <- (PLOT.nightlights.h2a.sub + ggtitle("Effect of Concentrated Refugee Presence")) +
          theme(plot.title = element_text(hjust=0, size = 11))

patch3 <- (PLOT.nightlights.h2b.sub + ggtitle("Effect of Dispersed Refugee Presence")) +
          theme(plot.title = element_text(hjust=0, size = 11)) 

patch2 + patch3

@

Table \ref{tab:MainModelsH3} in the SI shows a small but positive effect of refugee presence in the geographically concentrated case, confirming our expectations. From Figure \ref{fig:MainModelsH3}, the effect size is \Sexpr{plot.nlights_calib_mean.h2.sub[1,1]} (95\% CI $=[\Sexpr{plot.nlights_calib_mean.h2.sub[1,3]}, \Sexpr{plot.nlights_calib_mean.h2.sub[1,4]}]$). To give a sense of scale, for Kenya in 2012, the average nighttime lights measure in Nairobi (pop. 44.3 million) was 22.7, while for the least populated North-Eastern province (pop. 3.2 million), this measure was 6.1. As predicted, if refugee sites are dispersed throughout the country, there are no effects on development. Again, the placebo tests (gray) show no effects of future presence. 

\subsection*{Heterogeneous Effects by Duration, Relative Size, Formality, Border Proximity, and Region} 
\label{sec:heteffects}

In H4, we posit additional conditions under which we would expect greater gains in local development and stability for geographically concentrated refugee-hosting areas. We conduct the following heterogeneous effects analysis: we compare provinces with new versus older refugee sites (duration), large versus small number of refugees relative to the local population (relative size), refugee camps versus settlements (formality), and provinces near versus far from the border. Lastly, we examine subgroup effects by region. Due to space constraints, we only show the results for duration here with the remaining results presented in SI Section \ref{SIsec:hte}.

% NEW VS. OLD 
First, among hosting provinces, we compare whether there is a new refugee site created within the past year versus not (established refugee presence).\footnote{The placebo tests for this analysis is future refugee presence, plotted from the previous analyses since there is so placebo version for new site or established site.} The plots in Figure \ref{fig:Het_NewSiteH2H3} confirm that there is no effect of new refugee presence on any of the outcomes. However, when refugee communities have time to become established and they are geographically concentrated, we observe in the second set of plots negative effects on conflict outcomes and a positive, but not statistically significant, effect on development.\footnote{We also repeat this analysis defining new presense with a two year cutoff, results do not substantively change.} After experiencing substantial demographic shifts of incoming refugees, it takes time for an area to reach a new equilibrium and to see gains in local development. This finding is in line with our expectations, based on what UNHCR experts reported in Section \ref{sec:hypotheses}.

<<HetOnset_NewSiteH2, eval=TRUE, echo = FALSE, tidy=TRUE, warning=FALSE, message=FALSE, strip.white=TRUE>>=

## Treatment model with rt_1, with subset data
onset.h2.het.newsite1.sub <- bayesglm(onset.n ~ new_site_rt_1 +
                       rtb.other + 
                       new_site_rt_1_rtb.other +
                       onset.n_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       #log(bdist3) +
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = geddatasub[geddatasub$year < 2009,],
               family = binomial(link = "logit")) 

#summary(onset.h2.het.newsite1.sub)

onset.h2.het.nonewsite1.sub <- bayesglm(onset.n ~ no_new_site_rt_1 +
                       rtb.other + 
                       no_new_site_rt_1_rtb.other +
                       onset.n_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       #log(bdist3) +
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = geddatasub[geddatasub$year < 2009,],
               family = binomial(link = "logit")) 

#summary(onset.h2.het.nonewsite1.sub)

## Treatment model with rt_2, with subset data
onset.h2.het.newsite2.sub <- bayesglm(onset.n ~ new_site_rt_2 +
                       rtb.other + 
                       new_site_rt_2_rtb.other +
                       onset.n_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       #log(bdist3) +
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = geddatasub[geddatasub$year < 2009,],
               family = binomial(link = "logit")) 

#summary(onset.h2.het.newsite2.sub)

onset.h2.het.nonewsite2.sub <- bayesglm(onset.n ~ no_new_site_rt_2 +
                       rtb.other + 
                       no_new_site_rt_2_rtb.other +
                       onset.n_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       #log(bdist3) +
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = geddatasub[geddatasub$year < 2009,],
               family = binomial(link = "logit")) 

#summary(onset.h2.het.nonewsite2.sub)

## Placebo model
onset.h2.sub.pla <- bayesglm(onset.n ~ rtb.placebo + 
                       rtb.other + 
                       rtb.placebo_rtb.other +
                       onset.n_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       #log(bdist3) +
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = plagedrtbsub[plagedrtbsub$year < 2009,],
               #prior.scale = 1, prior.df = 3, 
               family = binomial(link = "logit")) 

#summary(onset.h2.sub.pla)

# Generate predicted probs
onset.h2.het.newsite1.sub.dplot <- pred.bi.int(onset.h2.het.newsite1.sub) #generate predicted probs
onset.h2.het.newsite1.sub.dplot$Group <- "Actual presence" #label the type of estimate

onset.h2.het.nonewsite1.sub.dplot <- pred.bi.int(onset.h2.het.nonewsite1.sub) #generate predicted probs
onset.h2.het.nonewsite1.sub.dplot$Group <- "Actual presence" #label the type of estimate

onset.h2.het.newsite2.sub.dplot <- pred.bi.int(onset.h2.het.newsite2.sub) #generate predicted probs
onset.h2.het.newsite2.sub.dplot$Group <- "Actual presence" #label the type of estimate

onset.h2.het.nonewsite2.sub.dplot <- pred.bi.int(onset.h2.het.nonewsite2.sub) #generate predicted probs
onset.h2.het.nonewsite2.sub.dplot$Group <- "Actual presence" #label the type of estimate

onset.h2.sub.pla.dplot <- pred.bi.int(onset.h2.sub.pla)
onset.h2.sub.pla.dplot$Group <- "Placebo presence"

plot.onset.h2.het.newsite1.sub <- rbind(onset.h2.het.newsite1.sub.dplot, #combine for ggplot
                               onset.h2.sub.pla.dplot)

plot.onset.h2.het.nonewsite1.sub <- rbind(onset.h2.het.nonewsite1.sub.dplot, #combine for ggplot
                               onset.h2.sub.pla.dplot)

plot.onset.h2.het.newsite2.sub <- rbind(onset.h2.het.newsite2.sub.dplot, #combine for ggplot
                               onset.h2.sub.pla.dplot)

plot.onset.h2.het.nonewsite2.sub <- rbind(onset.h2.het.nonewsite2.sub.dplot, #combine for ggplot
                               onset.h2.sub.pla.dplot)

# save output as Rdata files
# save(plot.onset.h2.het.newsite1.sub, file = "plot.onset.h2.het.newsite1.sub.Rdata")
# save(plot.onset.h2.het.nonewsite1.sub, file = "plot.onset.h2.het.nonewsite1.sub.Rdata")
# save(plot.onset.h2.het.newsite2.sub, file = "plot.onset.h2.het.newsite2.sub.Rdata")
# save(plot.onset.h2.het.nonewsite2.sub, file = "plot.onset.h2.het.nonewsite2.sub.Rdata")

@

<<HetIncidence_NewSiteH2, eval=TRUE, echo = FALSE, tidy=TRUE, warning=FALSE, message=FALSE, strip.white=TRUE>>=

## Treatment model with rt_1, with subset data
incidence.h2.het.newsite1.sub <- bayesglm(incidence ~ new_site_rt_1 +
                       rtb.other + 
                       new_site_rt_1_rtb.other +
                       incidence_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       #log(bdist3) +
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = geddatasub[geddatasub$year < 2009,],
               family = binomial(link = "logit")) 

#summary(incidence.h2.het.newsite1.sub)

incidence.h2.het.nonewsite1.sub <- bayesglm(incidence ~ no_new_site_rt_1 +
                       rtb.other + 
                       no_new_site_rt_1_rtb.other +
                       incidence_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       #log(bdist3) +
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = geddatasub[geddatasub$year < 2009,],
               family = binomial(link = "logit")) 

#summary(incidence.h2.het.nonewsite1.sub)

## Treatment model with rt_2, with subset data
incidence.h2.het.newsite2.sub <- bayesglm(incidence ~ new_site_rt_2 +
                       rtb.other + 
                       new_site_rt_2_rtb.other +
                       incidence_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       #log(bdist3) +
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = geddatasub[geddatasub$year < 2009,],
               family = binomial(link = "logit")) 

#summary(incidence.h2.het.newsite2.sub)

incidence.h2.het.nonewsite2.sub <- bayesglm(incidence ~ no_new_site_rt_2 +
                       rtb.other + 
                       no_new_site_rt_2_rtb.other +
                       incidence_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       #log(bdist3) +
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = geddatasub[geddatasub$year < 2009,],
               family = binomial(link = "logit")) 

#summary(incidence.h2.het.nonewsite2.sub)

## Placebo model
incidence.h2.sub.pla <- bayesglm(incidence ~ rtb.placebo + 
                       rtb.other + 
                       rtb.placebo_rtb.other +
                       incidence_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       #log(bdist3) +
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = plagedrtbsub[plagedrtbsub$year < 2009,],
               #prior.scale = 1, prior.df = 3, 
               family = binomial(link = "logit")) 

#summary(incidence.h2.sub.pla)

# Generate predicted probs
incidence.h2.het.newsite1.sub.dplot <- pred.bi.int(incidence.h2.het.newsite1.sub) #generate predicted probs
incidence.h2.het.newsite1.sub.dplot$Group <- "Actual presence" #label the type of estimate

incidence.h2.het.nonewsite1.sub.dplot <- pred.bi.int(incidence.h2.het.nonewsite1.sub) #generate predicted probs
incidence.h2.het.nonewsite1.sub.dplot$Group <- "Actual presence" #label the type of estimate

incidence.h2.het.newsite2.sub.dplot <- pred.bi.int(incidence.h2.het.newsite2.sub) #generate predicted probs
incidence.h2.het.newsite2.sub.dplot$Group <- "Actual presence" #label the type of estimate

incidence.h2.het.nonewsite2.sub.dplot <- pred.bi.int(incidence.h2.het.nonewsite2.sub) #generate predicted probs
incidence.h2.het.nonewsite2.sub.dplot$Group <- "Actual presence" #label the type of estimate

incidence.h2.sub.pla.dplot <- pred.bi.int(incidence.h2.sub.pla)
incidence.h2.sub.pla.dplot$Group <- "Placebo presence"

plot.incidence.h2.het.newsite1.sub <- rbind(incidence.h2.het.newsite1.sub.dplot, #combine for ggplot
                               incidence.h2.sub.pla.dplot)

plot.incidence.h2.het.nonewsite1.sub <- rbind(incidence.h2.het.nonewsite1.sub.dplot, #combine for ggplot
                               incidence.h2.sub.pla.dplot)

plot.incidence.h2.het.newsite2.sub <- rbind(incidence.h2.het.newsite2.sub.dplot, #combine for ggplot
                               incidence.h2.sub.pla.dplot)

plot.incidence.h2.het.nonewsite2.sub <- rbind(incidence.h2.het.nonewsite2.sub.dplot, #combine for ggplot
                               incidence.h2.sub.pla.dplot)

# save output as Rdata files
# save(plot.incidence.h2.het.newsite1.sub, file = "plot.incidence.h2.het.newsite1.sub.Rdata")
# save(plot.incidence.h2.het.nonewsite1.sub, file = "plot.incidence.h2.het.nonewsite1.sub.Rdata")
# save(plot.incidence.h2.het.newsite2.sub, file = "plot.incidence.h2.het.newsite2.sub.Rdata")
# save(plot.incidence.h2.het.nonewsite2.sub, file = "plot.incidence.h2.het.nonewsite2.sub.Rdata")

@

<<HetAttack_NewSiteH2, eval=TRUE, echo = FALSE, tidy=TRUE, warning=FALSE, message=FALSE, strip.white=TRUE>>=

## Treatment model with rt_1, with subset data
attack.h2.het.newsite1.sub <- lm_robust(attack ~ new_site_rt_1 +
                       rtb.other + 
                       new_site_rt_1_rtb.other +
                       attack_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       #log(bdist3) +
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = geddatasub)

#summary(attack.h2.het.newsite1.sub)

attack.h2.het.nonewsite1.sub <- lm_robust(attack ~ no_new_site_rt_1 +
                       rtb.other + 
                       no_new_site_rt_1_rtb.other +
                       attack_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       #log(bdist3) +
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = geddatasub)

#summary(attack.h2.het.newsite1.sub)

## Treatment model with rt_2, with subset data
attack.h2.het.newsite2.sub <- lm_robust(attack ~ new_site_rt_2 +
                       rtb.other + 
                       new_site_rt_2_rtb.other +
                       attack_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       #log(bdist3) +
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = geddatasub)

#summary(attack.h2.het.newsite1.sub)

attack.h2.het.nonewsite2.sub <- lm_robust(attack ~ no_new_site_rt_2 +
                       rtb.other + 
                       no_new_site_rt_2_rtb.other +
                       attack_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       #log(bdist3) +
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = geddatasub)

## Placebo model
attack.h2.sub.pla <- lm_robust(attack ~ rtb.placebo + 
                       rtb.other + 
                       rtb.placebo_rtb.other +
                       attack_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       #log(bdist3) +
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = plagedrtbsub)

#summary(attack.h2.sub.pla)

# Generate predicted probs
attack.h2.het.newsite1.sub.dplot <- pred.lm.int.r(attack.h2.het.newsite1.sub) #generate predicted probs
attack.h2.het.newsite1.sub.dplot$Group <- "Actual presence" #label the type of estimate

attack.h2.het.nonewsite1.sub.dplot <- pred.lm.int.r(attack.h2.het.nonewsite1.sub) #generate predicted probs
attack.h2.het.nonewsite1.sub.dplot$Group <- "Actual presence" #label the type of estimate

attack.h2.het.newsite2.sub.dplot <- pred.lm.int.r(attack.h2.het.newsite2.sub) #generate predicted probs
attack.h2.het.newsite2.sub.dplot$Group <- "Actual presence" #label the type of estimate

attack.h2.het.nonewsite2.sub.dplot <- pred.lm.int.r(attack.h2.het.nonewsite2.sub) #generate predicted probs
attack.h2.het.nonewsite2.sub.dplot$Group <- "Actual presence" #label the type of estimate

attack.h2.sub.pla.dplot <- pred.lm.int.r(attack.h2.sub.pla)
attack.h2.sub.pla.dplot$Group <- "Placebo presence"

plot.attack.h2.het.newsite1.sub <- rbind(attack.h2.het.newsite1.sub.dplot, #combine for ggplot
                                      attack.h2.sub.pla.dplot)

plot.attack.h2.het.nonewsite1.sub <- rbind(attack.h2.het.nonewsite1.sub.dplot, #combine for ggplot
                                      attack.h2.sub.pla.dplot)

plot.attack.h2.het.newsite2.sub <- rbind(attack.h2.het.newsite2.sub.dplot, #combine for ggplot
                                      attack.h2.sub.pla.dplot)

plot.attack.h2.het.nonewsite2.sub <- rbind(attack.h2.het.nonewsite2.sub.dplot, #combine for ggplot
                                      attack.h2.sub.pla.dplot)

@

<<HetBattleDeath_NewSiteH2, eval=TRUE, echo = FALSE, tidy=TRUE, warning=FALSE, message=FALSE, strip.white=TRUE>>=

## Treatment model with rt_1, with subset data
best.h2.het.newsite1.sub <- lm_robust(log(best+1) ~ new_site_rt_1 +
                       rtb.other + 
                       new_site_rt_1_rtb.other +
                       log(best_1+1) +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       #log(bdist3) +
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = geddatasub)

#summary(best.h2.het.newsite1.sub)

best.h2.het.nonewsite1.sub <- lm_robust(log(best+1) ~ no_new_site_rt_1 +
                       rtb.other + 
                       no_new_site_rt_1_rtb.other +
                       log(best_1+1) +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       #log(bdist3) +
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = geddatasub)

#summary(best.h2.het.newsite1.sub)

## Treatment model with rt_2, with subset data
best.h2.het.newsite2.sub <- lm_robust(log(best+1) ~ new_site_rt_2 +
                       rtb.other + 
                       new_site_rt_2_rtb.other +
                       log(best_1+1) +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       #log(bdist3) +
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = geddatasub)

#summary(best.h2.het.newsite2.sub)

best.h2.het.nonewsite2.sub <- lm_robust(log(best+1) ~ no_new_site_rt_2 +
                       rtb.other + 
                       no_new_site_rt_2_rtb.other +
                       log(best_1+1) +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       #log(bdist3) +
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = geddatasub)

#summary(best.h2.het.nonewsite2.sub)

## Placebo model
best.h2.sub.pla <- lm_robust(log(best+1) ~ rtb.placebo + 
                       rtb.other + 
                       rtb.placebo_rtb.other +
                       log(best_1+1) +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       #log(bdist3) +
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = plagedrtbsub)

#summary(best.h2.sub.pla)

# Generate predicted probs
best.h2.het.newsite1.sub.dplot <- pred.lm.int.r(best.h2.het.newsite1.sub) #generate predicted probs
best.h2.het.newsite1.sub.dplot$Group <- "Actual presence" #label the type of estimate

best.h2.het.nonewsite1.sub.dplot <- pred.lm.int.r(best.h2.het.nonewsite1.sub) #generate predicted probs
best.h2.het.nonewsite1.sub.dplot$Group <- "Actual presence" #label the type of estimate

best.h2.het.newsite2.sub.dplot <- pred.lm.int.r(best.h2.het.newsite2.sub) #generate predicted probs
best.h2.het.newsite2.sub.dplot$Group <- "Actual presence" #label the type of estimate

best.h2.het.nonewsite2.sub.dplot <- pred.lm.int.r(best.h2.het.nonewsite2.sub) #generate predicted probs
best.h2.het.nonewsite2.sub.dplot$Group <- "Actual presence" #label the type of estimate

best.h2.sub.pla.dplot <- pred.lm.int.r(best.h2.sub.pla)
best.h2.sub.pla.dplot$Group <- "Placebo presence"

plot.best.h2.het.newsite1.sub <- rbind(best.h2.het.newsite1.sub.dplot, #combine for ggplot
                                      best.h2.sub.pla.dplot)

plot.best.h2.het.nonewsite1.sub <- rbind(best.h2.het.nonewsite1.sub.dplot, #combine for ggplot
                                      best.h2.sub.pla.dplot)

plot.best.h2.het.newsite2.sub <- rbind(best.h2.het.newsite2.sub.dplot, #combine for ggplot
                                      best.h2.sub.pla.dplot)

plot.best.h2.het.nonewsite2.sub <- rbind(best.h2.het.nonewsite2.sub.dplot, #combine for ggplot
                                      best.h2.sub.pla.dplot)

@

<<HetNightLights_NewSiteH2, eval=TRUE, echo = FALSE, tidy=TRUE, warning=FALSE, message=FALSE, strip.white=TRUE>>=

## Treatment model with rt_1, with subset data
nlights_calib_mean.h2.het.newsite1.sub <- lm_robust(nlights_calib_mean ~ new_site_rt_1 +
                       rtb.other + 
                       new_site_rt_1_rtb.other +
                       nlights_calib_mean_1 +
                       nlights_mean_neighbor +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       #log(bdist3) +
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = geddatasub)

#summary(nlights_mean_neighbor +.h2.het.newsite1.sub)

nlights_calib_mean.h2.het.nonewsite1.sub <- lm_robust(nlights_calib_mean ~ no_new_site_rt_1 +
                       rtb.other + 
                       no_new_site_rt_1_rtb.other +
                       nlights_calib_mean_1 +
                       nlights_mean_neighbor +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       #log(bdist3) +
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = geddatasub)

#summary(nlights_calib_mean.h2.het.newsite1.sub)

## Treatment model with rt_2, with subset data
nlights_calib_mean.h2.het.newsite2.sub <- lm_robust(nlights_calib_mean ~ new_site_rt_2 +
                       rtb.other + 
                       new_site_rt_2_rtb.other +
                       nlights_calib_mean_1 +
                       nlights_mean_neighbor +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       #log(bdist3) +
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = geddatasub)

#summary(nlights_calib_mean.h2.het.newsite1.sub)

nlights_calib_mean.h2.het.nonewsite2.sub <- lm_robust(nlights_calib_mean ~ no_new_site_rt_2 +
                       rtb.other + 
                       no_new_site_rt_2_rtb.other +
                       nlights_calib_mean_1 +
                       nlights_mean_neighbor +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       #log(bdist3) +
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = geddatasub)

## Placebo model
nlights_calib_mean.h2.sub.pla <- lm_robust(nlights_calib_mean ~ rtb.placebo + 
                       rtb.other + 
                       rtb.placebo_rtb.other +
                       nlights_calib_mean_1 +
                       nlights_mean_neighbor +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       #log(bdist3) +
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = plagedrtbsub)

#summary(nlights_calib_mean.h2.sub.pla)

# Generate predicted probs
nlights_calib_mean.h2.het.newsite1.sub.dplot <- pred.lm.int.r(nlights_calib_mean.h2.het.newsite1.sub) #generate predicted probs
nlights_calib_mean.h2.het.newsite1.sub.dplot$Group <- "Actual presence" #label the type of estimate

nlights_calib_mean.h2.het.nonewsite1.sub.dplot <- pred.lm.int.r(nlights_calib_mean.h2.het.nonewsite1.sub) #generate predicted probs
nlights_calib_mean.h2.het.nonewsite1.sub.dplot$Group <- "Actual presence" #label the type of estimate

nlights_calib_mean.h2.het.newsite2.sub.dplot <- pred.lm.int.r(nlights_calib_mean.h2.het.newsite2.sub) #generate predicted probs
nlights_calib_mean.h2.het.newsite2.sub.dplot$Group <- "Actual presence" #label the type of estimate

nlights_calib_mean.h2.het.nonewsite2.sub.dplot <- pred.lm.int.r(nlights_calib_mean.h2.het.nonewsite2.sub) #generate predicted probs
nlights_calib_mean.h2.het.nonewsite2.sub.dplot$Group <- "Actual presence" #label the type of estimate

nlights_calib_mean.h2.sub.pla.dplot <- pred.lm.int.r(nlights_calib_mean.h2.sub.pla)
nlights_calib_mean.h2.sub.pla.dplot$Group <- "Placebo presence"

plot.nlights_calib_mean.h2.het.newsite1.sub <- rbind(nlights_calib_mean.h2.het.newsite1.sub.dplot, #combine for ggplot
                                      nlights_calib_mean.h2.sub.pla.dplot)

plot.nlights_calib_mean.h2.het.nonewsite1.sub <- rbind(nlights_calib_mean.h2.het.nonewsite1.sub.dplot, #combine for ggplot
                                      nlights_calib_mean.h2.sub.pla.dplot)

plot.nlights_calib_mean.h2.het.newsite2.sub <- rbind(nlights_calib_mean.h2.het.newsite2.sub.dplot, #combine for ggplot
                                      nlights_calib_mean.h2.sub.pla.dplot)

plot.nlights_calib_mean.h2.het.nonewsite2.sub <- rbind(nlights_calib_mean.h2.het.nonewsite2.sub.dplot, #combine for ggplot
                                      nlights_calib_mean.h2.sub.pla.dplot)

@

% PRED PROBS FIGURE FOR H4a
<<Het_NewSiteH2H3, eval=TRUE, echo = FALSE, tidy=TRUE, fig.width = 12, fig.height = 11, out.width= "1\\linewidth", fig.align='center', warning=FALSE, message=FALSE, strip.white=TRUE, fig.cap= paste("This figure shows the effect of new (within 1 year) versus established refugee presence (heterogeneous effects analysis) on conflict outcomes and nighttime lights (black), compared to their respective placebo estimates (gray). All point estimates include 95$\\%$ CIs.")>>=

# setwd("Paper_Inputs")
#
# Bring in RData from UCDP data
# load("plot.onset.h2.het.newsite1.sub.Rdata")
# load("plot.onset.h2.het.nonewsite1.sub.Rdata")
# load("plot.incidence.h2.het.newsite1.sub.Rdata")
# load("plot.incidence.h2.het.nonewsite1.sub.Rdata")

# Select and rename columns
plot.onset.h2.het.newsite1.sub$outcome <- "onset"
plot.incidence.h2.het.newsite1.sub$outcome <- "incidence"
plot.attack.h2.het.newsite1.sub$outcome <- "attack"
plot.best.h2.het.newsite1.sub$outcome <- "log(best + 1)"

plot.onset.h2.het.nonewsite1.sub$outcome <- "onset"
plot.incidence.h2.het.nonewsite1.sub$outcome <- "incidence"
plot.attack.h2.het.nonewsite1.sub$outcome <- "attack"
plot.best.h2.het.nonewsite1.sub$outcome <- "log(best + 1)"

# plots
pd <- position_dodge(0.5) # move them .05 to the left and right

# NEW SITE: h2a is concentrated, h2b is dispersed
PLOT.onsetincidence.h2a.het.newsite1.sub <-
  bind_rows(plot.onset.h2.het.newsite1.sub[plot.onset.h2.het.newsite1.sub$Plot == 3,],
            plot.incidence.h2.het.newsite1.sub[plot.incidence.h2.het.newsite1.sub$Plot == 3,]) %>%
   mutate(outcome = fct_relevel(outcome, "onset")) %>%
  ggplot(aes(x = outcome, y = Means, 
                           group=Group, colour=Group
                        )) + 
  geom_point(position=pd, size=3) +
  geom_errorbar(aes(ymin = as.numeric(as.character(CIL)), 
                    ymax = as.numeric(as.character(CIU))), 
                width = 0, size = .7, position = pd) +
  geom_hline(aes(yintercept=0)) +
  ylab("Change in Predicted Probability") +
  xlab("") +
  scale_x_discrete(labels=c("Onset",
                           "Incidence")) +
  scale_y_continuous(labels = scales::percent_format(accuracy = 1), limits = c(-.12, .12)) +
  scale_colour_manual(values=c("black", "gray60")) +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        panel.border = element_rect(colour = "gray", fill=NA, size=.8),
        legend.title = element_blank(),
        legend.justification = c(1, 1), 
        legend.position= c(.99, .99),
        legend.box = "horizontal",
        legend.direction = "vertical", 
        legend.key = element_rect(colour = "transparent", fill = "white")
        ) 

PLOT.onsetincidence.h2b.het.newsite1.sub <-
  bind_rows(plot.onset.h2.het.newsite1.sub[plot.onset.h2.het.newsite1.sub$Plot == 6,],
            plot.incidence.h2.het.newsite1.sub[plot.incidence.h2.het.newsite1.sub$Plot == 6,]) %>%
   mutate(outcome = fct_relevel(outcome, "onset")) %>%
  ggplot(aes(x = outcome, y = Means, 
                           group=Group, colour=Group
                        )) + 
  geom_point(position=pd, size=3) +
  geom_errorbar(aes(ymin = as.numeric(as.character(CIL)), 
                    ymax = as.numeric(as.character(CIU))), 
                width = 0, size = .7, position = pd) +
  geom_hline(aes(yintercept=0)) +
  ylab("Change in Predicted Probability") +
  xlab("") +
  scale_x_discrete(labels=c("Onset",
                           "Incidence")) +
  scale_y_continuous(labels = scales::percent_format(accuracy = 1), limits = c(-.12, .12)) +
  scale_colour_manual(values=c("black", "gray60")) +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        legend.position = "none",
        panel.border = element_rect(colour = "gray", fill=NA, size=.8)
        )


PLOT.attack.h2a.het.newsite1.sub <- ggplot(plot.attack.h2.het.newsite1.sub[c(1,3),], 
                                      aes(x = outcome, y = Means, 
                           group=Group, colour=Group
                        )) + 
  geom_point(position=pd, size=3) +
  geom_errorbar(aes(ymin = as.numeric(as.character(CIL)), 
                    ymax = as.numeric(as.character(CIU))), 
                width = 0, size = .7, position = pd) +
  geom_hline(aes(yintercept=0)) +
  ylim(-9,9) +
  ylab("Change in Predicted Number") +
  xlab("") +
  scale_x_discrete(labels=c("Violent Events")) +
  scale_colour_manual(values=c("black", "gray60")) +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        legend.position = "none",
        panel.border = element_rect(colour = "gray", fill=NA, size=.8)
        )

PLOT.attack.h2b.het.newsite1.sub <- ggplot(plot.attack.h2.het.newsite1.sub[c(2,4),], 
                                      aes(x = outcome, y = Means, 
                           group=Group, colour=Group
                        )) + 
  geom_point(position=pd, size=3) +
  geom_errorbar(aes(ymin = as.numeric(as.character(CIL)), 
                    ymax = as.numeric(as.character(CIU))), 
                width = 0, size = .7, position = pd) +
  geom_hline(aes(yintercept=0)) +
  ylim(-9,9) +
  ylab("Change in Predicted Number") +
  xlab("") +
  scale_x_discrete(labels=c("Violent Events")) +
  scale_colour_manual(values=c("black", "gray60")) +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        legend.position = "none",
        panel.border = element_rect(colour = "gray", fill=NA, size=.8)
        )

PLOT.best.h2a.het.newsite1.sub <- ggplot(plot.best.h2.het.newsite1.sub[c(1,3),], 
                                    aes(x = outcome, y = Means, 
                           group=Group, colour=Group
                        )) + 
  geom_point(position=pd, size=3) +
  geom_errorbar(aes(ymin = as.numeric(as.character(CIL)), 
                    ymax = as.numeric(as.character(CIU))), 
                width = 0, size = .7, position = pd) +
  geom_hline(aes(yintercept=0)) +
  ylim(-.8,.8) +
  ylab("Change in Predicted Number (logged)") +
  xlab("") +
  scale_x_discrete(labels=c("Battle Deaths")) +
  scale_colour_manual(values=c("black", "gray60")) +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        legend.position = "none",
        panel.border = element_rect(colour = "gray", fill=NA, size=.8)
        )

PLOT.best.h2b.het.newsite1.sub <- ggplot(plot.best.h2.het.newsite1.sub[c(2,4),], 
                                    aes(x = outcome, y = Means, 
                           group=Group, colour=Group
                        )) + 
  geom_point(position=pd, size=3) +
  geom_errorbar(aes(ymin = as.numeric(as.character(CIL)), 
                    ymax = as.numeric(as.character(CIU))), 
                width = 0, size = .7, position = pd) +
  geom_hline(aes(yintercept=0)) +
  ylim(-.8,.8) +
  ylab("Change in Predicted Number (logged)") +
  xlab("") +
  scale_x_discrete(labels=c("Battle Deaths")) +
  scale_colour_manual(values=c("black", "gray60")) +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        legend.position = "none",
        panel.border = element_rect(colour = "gray", fill=NA, size=.8)
        )


# NO NEW SITE: h2a is concentrated, h2b is dispersed
PLOT.onsetincidence.h2a.het.nonewsite1.sub <-
  bind_rows(plot.onset.h2.het.nonewsite1.sub[plot.onset.h2.het.nonewsite1.sub$Plot == 3,],
            plot.incidence.h2.het.nonewsite1.sub[plot.incidence.h2.het.nonewsite1.sub$Plot == 3,]) %>%
   mutate(outcome = fct_relevel(outcome, "onset")) %>%
  ggplot(aes(x = outcome, y = Means, 
                           group=Group, colour=Group
                        )) + 
  geom_point(position=pd, size=3) +
  geom_errorbar(aes(ymin = as.numeric(as.character(CIL)), 
                    ymax = as.numeric(as.character(CIU))), 
                width = 0, size = .7, position = pd) +
  geom_hline(aes(yintercept=0)) +
  ylab("Change in Predicted Probability") +
  xlab("") +
  scale_x_discrete(labels=c("Onset",
                           "Incidence")) +
  scale_y_continuous(labels = scales::percent_format(accuracy = 1), limits = c(-.12, .12)) +
  scale_colour_manual(values=c("black", "gray60")) +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        legend.position = "none",
        panel.border = element_rect(colour = "gray", fill=NA, size=.8)
        )


PLOT.onsetincidence.h2b.het.nonewsite1.sub <-
  bind_rows(plot.onset.h2.het.nonewsite1.sub[plot.onset.h2.het.nonewsite1.sub$Plot == 6,],
            plot.incidence.h2.het.nonewsite1.sub[plot.incidence.h2.het.nonewsite1.sub$Plot == 6,]) %>%
   mutate(outcome = fct_relevel(outcome, "onset")) %>%
  ggplot(aes(x = outcome, y = Means, 
                           group=Group, colour=Group
                        )) + 
  geom_point(position=pd, size=3) +
  geom_errorbar(aes(ymin = as.numeric(as.character(CIL)), 
                    ymax = as.numeric(as.character(CIU))), 
                width = 0, size = .7, position = pd) +
  geom_hline(aes(yintercept=0)) +
  ylab("Change in Predicted Probability") +
  xlab("") +
  scale_x_discrete(labels=c("Onset",
                           "Incidence")) +
  scale_y_continuous(labels = scales::percent_format(accuracy = 1), limits = c(-.12, .12)) +
  scale_colour_manual(values=c("black", "gray60")) +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        legend.position = "none",
        panel.border = element_rect(colour = "gray", fill=NA, size=.8)
        )


PLOT.attack.h2a.het.nonewsite1.sub <- ggplot(plot.attack.h2.het.nonewsite1.sub[c(1,3),], 
                                      aes(x = outcome, y = Means, 
                           group=Group, colour=Group
                        )) + 
  geom_point(position=pd, size=3) +
  geom_errorbar(aes(ymin = as.numeric(as.character(CIL)), 
                    ymax = as.numeric(as.character(CIU))), 
                width = 0, size = .7, position = pd) +
  geom_hline(aes(yintercept=0)) +
  ylim(-9,9) +
  ylab("Change in Predicted Number") +
  xlab("") +
  scale_x_discrete(labels=c("Violent Events")) +
  scale_colour_manual(values=c("black", "gray60")) +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        legend.position = "none",
        panel.border = element_rect(colour = "gray", fill=NA, size=.8)
        )

PLOT.attack.h2b.het.nonewsite1.sub <- ggplot(plot.attack.h2.het.nonewsite1.sub[c(2,4),], 
                                      aes(x = outcome, y = Means, 
                           group=Group, colour=Group
                        )) + 
  geom_point(position=pd, size=3) +
  geom_errorbar(aes(ymin = as.numeric(as.character(CIL)), 
                    ymax = as.numeric(as.character(CIU))), 
                width = 0, size = .7, position = pd) +
  geom_hline(aes(yintercept=0)) +
  ylim(-9,9) +
  ylab("Change in Predicted Number") +
  xlab("") +
  scale_x_discrete(labels=c("Violent Events")) +
  scale_colour_manual(values=c("black", "gray60")) +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        legend.position = "none",
        panel.border = element_rect(colour = "gray", fill=NA, size=.8)
        )

PLOT.best.h2a.het.nonewsite1.sub <- ggplot(plot.best.h2.het.nonewsite1.sub[c(1,3),], 
                                    aes(x = outcome, y = Means, 
                           group=Group, colour=Group
                        )) + 
  geom_point(position=pd, size=3) +
  geom_errorbar(aes(ymin = as.numeric(as.character(CIL)), 
                    ymax = as.numeric(as.character(CIU))), 
                width = 0, size = .7, position = pd) +
  geom_hline(aes(yintercept=0)) +
  ylim(-.8,.8) +
  ylab("Change in Predicted Number (logged)") +
  xlab("") +
  scale_x_discrete(labels=c("Battle Deaths")) +
  scale_colour_manual(values=c("black", "gray60")) +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        legend.position = "none",
        panel.border = element_rect(colour = "gray", fill=NA, size=.8)
        )

PLOT.best.h2b.het.nonewsite1.sub <- ggplot(plot.best.h2.het.nonewsite1.sub[c(2,4),], 
                                    aes(x = outcome, y = Means, 
                           group=Group, colour=Group
                        )) + 
  geom_point(position=pd, size=3) +
  geom_errorbar(aes(ymin = as.numeric(as.character(CIL)), 
                    ymax = as.numeric(as.character(CIU))), 
                width = 0, size = .7, position = pd) +
  geom_hline(aes(yintercept=0)) +
  ylim(-.8,.8) +
  ylab("Change in Predicted Number (logged)") +
  xlab("") +
  scale_x_discrete(labels=c("Battle Deaths")) +
  scale_colour_manual(values=c("black", "gray60")) +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        legend.position = "none",
        panel.border = element_rect(colour = "gray", fill=NA, size=.8)
        )

# NEW SITE: h2a is concentrated, h2b is dispersed

PLOT.nlights_calib_mean.h2a.het.newsite1.sub <- ggplot(plot.nlights_calib_mean.h2.het.newsite1.sub[c(1,3),], 
                                      aes(x = outcome, y = Means, 
                           group=Group, colour=Group
                        )) + 
  geom_point(position=pd, size=3) +
  geom_errorbar(aes(ymin = as.numeric(as.character(CIL)), 
                    ymax = as.numeric(as.character(CIU))), 
                width = 0, size = .7, position = pd) +
  geom_hline(aes(yintercept=0)) +
  ylim(-.25,.25) +
  ylab("Change in Predicted Lights") +
  xlab("") +
  scale_x_discrete(labels=c("Average Nighttime Lights")) +
  scale_colour_manual(values=c("black", "gray60")) +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        legend.position = "none",
        panel.border = element_rect(colour = "gray", fill=NA, size=.8)
        )


PLOT.nlights_calib_mean.h2b.het.newsite1.sub <- ggplot(plot.nlights_calib_mean.h2.het.newsite1.sub[c(2,4),], 
                                      aes(x = outcome, y = Means, 
                           group=Group, colour=Group
                        )) + 
  geom_point(position=pd, size=3) +
  geom_errorbar(aes(ymin = as.numeric(as.character(CIL)), 
                    ymax = as.numeric(as.character(CIU))), 
                width = 0, size = .7, position = pd) +
  geom_hline(aes(yintercept=0)) +
  ylim(-.25,.25) +
  ylab("Change in Predicted Lights") +
  xlab("") +
  scale_x_discrete(labels=c("Average Nighttime Lights")) +
  scale_colour_manual(values=c("black", "gray60")) +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        legend.position = "none",
        panel.border = element_rect(colour = "gray", fill=NA, size=.8)
        )


# NO NEW SITE: h2a is concentrated, h2b is dispersed
PLOT.nlights_calib_mean.h2a.het.nonewsite1.sub <- ggplot(plot.nlights_calib_mean.h2.het.nonewsite1.sub[c(1,3),], 
                                      aes(x = outcome, y = Means, 
                           group=Group, colour=Group
                        )) + 
  geom_point(position=pd, size=3) +
  geom_errorbar(aes(ymin = as.numeric(as.character(CIL)), 
                    ymax = as.numeric(as.character(CIU))), 
                width = 0, size = .7, position = pd) +
  geom_hline(aes(yintercept=0)) +
  ylim(-.25,.25) +
  ylab("Change in Predicted Lights") +
  xlab("") +
  scale_x_discrete(labels=c("Average Nighttime Lights")) +
  scale_colour_manual(values=c("black", "gray60")) +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        legend.position = "none",
        panel.border = element_rect(colour = "gray", fill=NA, size=.8)
        )


PLOT.nlights_calib_mean.h2b.het.nonewsite1.sub <- ggplot(plot.nlights_calib_mean.h2.het.nonewsite1.sub[c(2,4),], 
                                      aes(x = outcome, y = Means, 
                           group=Group, colour=Group
                        )) + 
  geom_point(position=pd, size=3) +
  geom_errorbar(aes(ymin = as.numeric(as.character(CIL)), 
                    ymax = as.numeric(as.character(CIU))), 
                width = 0, size = .7, position = pd) +
  geom_hline(aes(yintercept=0)) +
  ylim(-.25,.25) +
  ylab("Change in Predicted Lights") +
  xlab("") +
  scale_x_discrete(labels=c("Average Nighttime Lights")) +
  scale_colour_manual(values=c("black", "gray60")) +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        legend.position = "none",
        panel.border = element_rect(colour = "gray", fill=NA, size=.8)
        )


# plots
patch1.newsite1 <- (PLOT.onsetincidence.h2a.het.newsite1.sub + 
                      PLOT.attack.h2a.het.newsite1.sub + 
                      PLOT.best.h2a.het.newsite1.sub +
                      PLOT.nlights_calib_mean.h2a.het.newsite1.sub + plot_layout(widths = c(2, 1, 1, 1))) +
                      ggtitle("Effect of Concentrated New Presence in Past Year") + 
                    theme(plot.title = element_text(hjust= 4.3))

patch1.nonewsite1 <- (PLOT.onsetincidence.h2a.het.nonewsite1.sub + 
                      PLOT.attack.h2a.het.nonewsite1.sub + 
                      PLOT.best.h2a.het.nonewsite1.sub +
                      PLOT.nlights_calib_mean.h2a.het.nonewsite1.sub + plot_layout(widths = c(2, 1, 1, 1))) +
                        ggtitle("Effect of Concentrated Established Presence") + 
                    theme(plot.title = element_text(hjust= 5.35))

patch2.newsite1 <- (PLOT.onsetincidence.h2b.het.newsite1.sub + 
                      PLOT.attack.h2b.het.newsite1.sub + 
                      PLOT.best.h2b.het.newsite1.sub +
                      PLOT.nlights_calib_mean.h2b.het.newsite1.sub + plot_layout(widths = c(2, 1, 1, 1))) +
                      ggtitle("Effect of Dispersed New Presence in Past Year") + 
                  theme(plot.title = element_text(hjust= 4.9))

patch2.nonewsite1 <- (PLOT.onsetincidence.h2b.het.nonewsite1.sub + 
                      PLOT.attack.h2b.het.nonewsite1.sub + 
                      PLOT.best.h2b.het.nonewsite1.sub +
                      PLOT.nlights_calib_mean.h2b.het.nonewsite1.sub + plot_layout(widths = c(2, 1, 1, 1))) +
                        ggtitle("Effect of Dispersed Established Presence") + 
                  theme(plot.title = element_text(hjust= 6.35))

patch1.newsite1/
patch1.nonewsite1/
patch2.newsite1/
patch2.nonewsite1

@


% LARGE VS. SMALL (reference SI, regression model results)
Second, although we do not have refugee population data for our entire panel, as described in Section \ref{sec:popdata}, for African countries from 2010 to 2015, we have total population numbers across provinces. Here, we designate between large and small refugee presence by coding refugee-hosting provinces that have a 5\% ratio or greater of refugee population compared to the local population as ``large'' and hosting provinces under 5\% as ``small.'' In SI Section \ref{SIsec:hte_largesmall}, we find that there is no effect of large relative populations on conflict. However, there is a positive effect of small populations on conflict. This may be due to small populations not bringing developmental benefits and being easier targets for victimization. Future research should further interrogate this finding.

% CAMPS VS. SETTLEMENTS (reference SI)
Third, we predict that the negative effects on conflict onset and incidence and the positive effects on infrastructure in geographically concentrated cases of refugee site presence should be stronger when those sites are camps. In SI Section \ref{SIsec:hte_campssettlements}, we descriptively show of the \Sexpr{sum(geddatasub$rtb)} province-years that host refugee sites, the number of province-years with only formal refugee camps (\Sexpr{(sum(geddatasub[geddatasub$rsb == 0,]$rcb)/sum(geddatasub$rtb))*100}\%), only informal refugee settlements (\Sexpr{(sum(geddatasub[geddatasub$rcb == 0,]$rsb)/sum(geddatasub$rtb))*100}\%), or both (\Sexpr{(sum(geddatasub[geddatasub$rcb == 1,]$rsb)/sum(geddatasub$rtb))*100}\%) across time. Confirming our expectations in H4, SI Figures \ref{fig:Het_CampsSettH1} and \ref{fig:Het_CampsSettH2} show that the \emph{conditional risk reduction effect} on conflict outcomes are moderately driven by camps. 

With respect to the policy implications of these findings -- that formal camps (as opposed to settlements) are more secure -- the UNHCR \textit{does not} advocate for the camp-based model of hosting. In camps, refugees are effectively institutionalized. Multiple officials acknowledged that host governments generally prefer camps for security and development reasons: ``We would like no big concentrations of refugee populations in a place and yet, still the countries prefer a camp style arrangement for various reasons. Some of them might say that `I want more efficiency of programs, I want to make sure that there is no burden on my budgets so I can geographically distinct them, I want for security reasons to control the movements.'''\footnote{Senior Official at Kakuma Refugee Camp, interview conducted on July 23, 2018.} And ``In camps, more money is spent per capita on refugees, and there is increased ability to deliver assistance. Camps are more cost effective and provide physical security and services, but they are not meant for people to stay there forever.''\footnote{Senior Regional Protection Officer, interview conducted on June 25, 2018.} 

% NEAR VS. FAR BORDER (reference SI)
Next, SI Section \ref{SIsec:hte_nearborder} examines heterogeneous effects by whether provinces are near (within 100km) or far (outside 100km) from the nearest international border. We find negative effects for provinces in the interior of countries (far from border) on onset and incidence. Likewise, in the cases of concentrated refugee presence for provinces in the interior, the negative effects on conflict and positive effects on nighttime lights are prominent. Although refugees in traditionally peripheral, border areas may present more opportunities for development, it may be the case that the state and organizations like UNHCR are able to more effectively respond to communities in the country's interior, where there are likely more established urban centers. 

% BY REGION (reference SI)
Lastly, SI Section \ref{SIsec:hte_region} shows subgroup effects of refugee presence on conflict by region of the world. For most regions, we observe a null effect. However, we observe a statistically significant \textit{positive effect} of refugee presence on battle deaths in Southern Asia -- Afghanistan, Bangladesh, India, Iran, Nepal, and Pakistan. In contrast, we observe a statistically significant negative effect of refugee presence on battle deaths in Western Asia, which includes countries that host Syrian refugees, i.e. Turkey, Jordan, and Iraq. These findings prompt additional micro-level research that is outside the scope of this paper. Nevertheless, they are useful for us to further probe the overall null effect; most regions observe a null effect, while in other areas, namely Southern Asia and Western Asia, the global null results mask positive and negative effects. 


\subsection*{Robustness Checks}
\label{sec:robustness}

We perform the following robustness checks, in addition to those previously mentioned. First, in SI Section \ref{SIsec:conflicttypes}, we examine sub-types of conflict intensity measures: state-based, non-state, and one-sided violent events and logged battle deaths. We show that there is no effect of refugee presence on the various sub-types of conflict (H1), and there there are negative effects of concentrated refugee presence on several of the conflict types, state-based and one-sided violent events and state-based battle deaths, while null effects for dispersed refugee presence (H2). These are all consistent with our theoretical expectations. 

Second, we rerun our analyses dropping one country at a time and dropping one region at a time. We confirm that our results are not driven by any outlier countries or regions (SI Section \ref{SIsec:DropCountryAnalysis} and Section \ref{SIsec:DropRegionAnalysis}). Third, we repeat the analyses using the full dataset that encompasses all countries, as opposed to subsetting down to only countries who have ever hosted refugees during the study period, and results do not substantively change (SI Section \ref{SIsec:Full}). 

Fourth, we use a modified \textit{Dynamic Subset Data} to address concerns over possible collider bias, which can occur in analyses that condition on variables causally determined by both the treatment and the outcome \citep{Morgan:2014}. With this modified dataset, at time $t$, we only include the subset of countries that have already hosted refugees in the past, again, results do not substantively change (SI Section \ref{SIsec:DynamicSubsetAnalysis}). Fifth, we the outcome variables by 5 years to show that there are no delayed positive effects of the presence of refugee sites on conflict (SI Section \ref{SIsec:Lead5Analysis}). Finally, when we use wzoneData \citep{Kikuta:2020} to generate conflict onset and incidence variables, SI Section \ref{SIsec:wzone} show that the main results do not meaningfully change.


\section*{Conclusion}
\label{sec:conclusion} 

% PROVIDE SUMMARY 
This study challenges and extends existing research on conflict and conflict-affected migration. We provide quantitative global analysis at a subnational level using geo-referenced data on displacement sites covering the past three decades. This period has been marked by unprecedented levels of displacement. Our findings strongly and consistently point to the lack of any conflict risk attributable to the presence of refugee communities. By bringing into this debate recent studies on the positive effects of refugees on host communities, additional analysis examining nighttime lights, and knowledge from experts who have worked in this field around the world for decades, this research contends that the mix of positive and negative effects of refugee settlement will generally leave security conditions unaffected, and in some cases, improve them. We show that under certain conditions -- when refugee sites are geographically concentrated in a country, when their presence has had time to generate a new equilibrium, or when refugees are relatively large in number -- the risk of conflict decreases substantially. Moreover, we find suggestive evidence that this \emph{conditional risk reduction effect} is due to increased development resulting from economic activity, aid, and infrastructure within these areas. We discuss possible selection bias and address it using placebo tests and matching. Our findings stand in stark opposition to much of the existing literature. Crucially, we provide an empirical bulwark against growing, often politically-driven, efforts to turn refugees away, discriminate against them or oppress them for fear of jeopardizing domestic security. This fear is unfounded. 

% FUTURE RESEARCH
We propose several avenues for future research. First, a notable limitation to this study is the lack of data on displacement site characteristics, particularly with respect to population numbers that spans all refugee-hosting countries, nationalities of origin, ethnic breakdown, and levels of aid and state involvement. Such data would help unpack how the size of refugee communities affects conflict risk and other development outcomes. Furthermore, this would allow researchers to test the micro-level mechanisms discussed in the literature. Second, while this paper examines civil conflict outcomes, future research can expand to lower level contentious events, such as protests and smaller-scale communal violence. Third, other iterations of this research can explicitly model diffusion effects; how far do infrastructural improvements and risk reduction extend? We encourage more exploration of this potential risk reduction effect through case studies and, for aid organizations that are implementing programs aimed at improving refugee-host relations, through experimental work. 

% IMPLICATIONS
Our study has critical implications for current migration debates. As the global refugee crisis grows each year, it is imperative for scholars, policymakers, and the public to resist alarmist calls that associate hosting refugees with conflict, especially in order to justify restrictions on refugee settlement and other anti-migrant policies. Our findings also provide insights to humanitarian agencies working with refugee populations. Although we empirically show that geographically concentrated sites experience lower conflict risk and greater infrastructure, this does not imply that host governments relocate refugees in their country into geographically clustered camps. As we discussed, there may be little ability to do so given how quickly exigent crises lead people to quickly migrate. More importantly, the UNHCR and other humanitarian organizations do not recommend hosting refugees in camps; camps are adequate as temporary solutions, but they become dehumanizing when individuals remain in them for years or even decades. Instead, the policy-relevant lessons that we can draw from our findings are for humanitarian agencies and donors to prioritize local development of infrastructure and state capacity, and to ensure that host communities can also benefit from aid. We recognize that host states face many challenges, especially since refugees often settle in the most remote and underdeveloped regions of the country. By showing how local host communities can benefit from hosting, our research ultimately hopes to encourage greater accommodation and integration of refugees.

\newpage

\section*{Disclaimers}
The authors declare the human subjects research in this article was deemed exempt from review by the Princeton University Institutional Review Board. The author declares no ethical issues or conflicts of interest in this research. Research documentation and/or data that support the findings of this study are openly available in the APSR Dataverse at XXX. Limitations on certain data availability are discussed in the SI and replication files.

\newpage

\setstretch{1}
\bibliography{refugeeconflict}

\end{document}
